├── video.mp4 ├── requirements.txt ├── main_latest.py ├── sidebar.txt ├── README.md ├── .devcontainer └── devcontainer.json ├── palm_api.py ├── backend.py ├── llm_utils.py ├── yt_utils.py └── main.py /video.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-I-nstein/YouTube-TLDR/HEAD/video.mp4 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit 2 | langchain 3 | assemblyai 4 | pytube 5 | youtube-transcript-api 6 | weaviate-client 7 | openai 8 | tiktoken 9 | google-generativeai 10 | -------------------------------------------------------------------------------- /main_latest.py: -------------------------------------------------------------------------------- 1 | # imports 2 | 3 | import streamlit as st 4 | from backend import get_summary, get_answer 5 | 6 | # page config 7 | 8 | st.set_page_config( 9 | page_title = 'YouTube Q & A', 10 | layout = 'wide', 11 | initial_sidebar_state = 'expanded' 12 | ) 13 | 14 | # side bar 15 | 16 | with st.sidebar: 17 | with open('sidebar.txt') as fb: 18 | text = fb.read() 19 | st.write(text) 20 | 21 | # main page 22 | 23 | st.title(":video_camera: YouTube Video Q and A") 24 | 25 | # UI Elements 26 | 27 | st.header("Sorry! We're working on improving this site! Here's a demo video in the meanwhile (Thanks to Weaviate!)") 28 | st.video("video.mp4") 29 | -------------------------------------------------------------------------------- /sidebar.txt: -------------------------------------------------------------------------------- 1 | Hi, this site can answer questions based on the link to the YouTube video provided by the user. The site also tells you where exactly (time stamp) on the video the question is answered. 2 | 3 | Instructions: 4 | 1. Paste the link of a YouTube video in the textbox and click 'Process Video'. 5 | 2. Once the video gets processed enter your query and click on 'Get Answer' 6 | 3. The answer appears with the related time stamp. 7 | 4. Click on 'Go To Timestamp' to go to the section of the video where the question is answered. 8 | 5. Refresh the page befor you paste a new link to clear cache. 9 | 10 | Technology used: 11 | 1. LangChain: To integrate LLM models and vector databases to create a pipeine. 12 | 2. AssemblyAI: To generate transcript for videos that do not come with captions. 13 | 3. Weaviate: To store vector data. 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # YouTube-TLDR 2 | Using LLM to summarize and enhance media consumption experience. 3 | 4 | Hi, this site can answer questions based on the link to the YouTube video provided by the user. The site also tells you where exactly (time stamp) on the video the question is answered. 5 | 6 | ## Instructions: 7 | 1. Paste the link of a YouTube video in the textbox and click 'Process Video'. 8 | 2. Once the video gets processed enter your query and click on 'Get Answer' 9 | 3. The answer appears with the related time stamp. 10 | 4. Click on 'Go To Timestamp' to go to the section of the video where the question is answered. 11 | 5. Refresh the page befor you paste a new link to clear cache. 12 | 13 | ## Technology used: 14 | 1. LangChain: To integrate OpenAI models and Weaviate vector database. 15 | 2. AssemblyAI: To generate transcript for videos that do not come with captions. 16 | 3. Weaviate: To store vector data. 17 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Python 3", 3 | // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile 4 | "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye", 5 | "customizations": { 6 | "codespaces": { 7 | "openFiles": [ 8 | "README.md", 9 | "main.py" 10 | ] 11 | }, 12 | "vscode": { 13 | "settings": {}, 14 | "extensions": [ 15 | "ms-python.python", 16 | "ms-python.vscode-pylance" 17 | ] 18 | } 19 | }, 20 | "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y str: 13 | return "PALM API" 14 | 15 | def _call( 16 | self, 17 | prompt: str, 18 | stop: Optional[List[str]] = None, 19 | run_manager: Optional[CallbackManagerForLLMRun] = None, 20 | **kwargs: Any, 21 | ) -> str: 22 | if stop is not None: 23 | raise ValueError("stop kwargs are not permitted.") 24 | 25 | models = [m for m in palm.list_models() if 'generateText' in m.supported_generation_methods] 26 | model = models[0].name 27 | 28 | completion = palm.generate_text( 29 | model=model, 30 | prompt=prompt, 31 | temperature=0, 32 | # The maximum length of the response 33 | max_output_tokens=800, 34 | ) 35 | 36 | return completion.result 37 | -------------------------------------------------------------------------------- /backend.py: -------------------------------------------------------------------------------- 1 | # imports 2 | import os 3 | import glob 4 | import json 5 | from llm_utils import llm_summary, llm_answer 6 | from yt_utils import get_subtitles 7 | import weaviate as weav 8 | 9 | WEAVIATE_API_KEY = os.getenv('WEAVIATE_API_KEY') 10 | WEAVIATE_URL = os.getenv('WEAVIATE_URL') 11 | 12 | # clean up 13 | 14 | def cleaner(): 15 | if not os.path.exists('output'): 16 | os.makedirs('output') 17 | else: 18 | files = glob.glob('output/*') 19 | for f in files: 20 | os.remove(f) 21 | 22 | client = weav.Client( 23 | url=WEAVIATE_URL, 24 | auth_client_secret=weav.AuthApiKey(api_key=WEAVIATE_API_KEY), 25 | ) 26 | for value in client.data_object.get()['objects']: 27 | client.data_object.delete(value['id']) 28 | 29 | # function to generate summary of a video 30 | 31 | def get_summary(video_link): 32 | cleaner() 33 | status, data = get_subtitles(video_link) 34 | if status == 'success': 35 | status, data = llm_summary(data) 36 | return status, data 37 | 38 | 39 | # function to get an answer 40 | 41 | def get_answer(question): 42 | with open('output/refined_data.json') as fb: 43 | subtitles = json.load(fb) 44 | status, data = llm_answer(question, subtitles) 45 | return status, data -------------------------------------------------------------------------------- /llm_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import streamlit as st 4 | # from palm_api import PALM 5 | from langchain.llms import OpenAI 6 | from langchain.vectorstores import Weaviate 7 | from langchain.chat_models import ChatOpenAI 8 | from langchain import PromptTemplate, LLMChain 9 | from langchain.embeddings.openai import OpenAIEmbeddings 10 | 11 | 12 | 13 | OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') 14 | WEAVIATE_API_KEY = os.getenv('WEAVIATE_API_KEY') 15 | WEAVIATE_URL = os.getenv('WEAVIATE_URL') 16 | 17 | @st.cache_resource 18 | def save_embeddings(captions): 19 | 20 | texts = [] 21 | metadata = [] 22 | 23 | for record in captions: 24 | texts.append(record['text']) 25 | metadata.append({'start': record['start']}) 26 | 27 | embeddings = OpenAIEmbeddings() 28 | weaviate = Weaviate.from_texts( 29 | texts, 30 | embeddings, 31 | metadatas = metadata, 32 | weaviate_url = WEAVIATE_URL 33 | ) 34 | return weaviate 35 | 36 | 37 | def parseNumber(text): 38 | newText = "" 39 | text = text.replace('\n', ' ') 40 | for i in text: 41 | if (i >= '0' and i <= '9') or i == '.': 42 | newText += i 43 | return math.floor(float(newText)) 44 | 45 | def llm_summary(subtitles): 46 | template = """Subtitles are enclosed in ###. Summarize the subtitles. 47 | ### 48 | {srt} 49 | ### 50 | Answer: 51 | """ 52 | try: 53 | prompt = PromptTemplate(template=template, input_variables=["srt"]) 54 | llm = OpenAI(openai_api_key=OPENAI_API_KEY) 55 | # llm = PALM() 56 | llm_chain = LLMChain(prompt=prompt, llm=llm) 57 | summary = llm_chain.run(subtitles) 58 | except Exception as e: 59 | return 'fail', 'Token limit exceeded.' 60 | else: 61 | return 'success', summary 62 | 63 | def llm_answer(question, captions): 64 | try: 65 | weaviate = save_embeddings(captions) 66 | docs = weaviate.similarity_search(question, k=1) 67 | data = docs[0].page_content 68 | except Exception as e: 69 | return 'fail', 'Could not extract transcript. Please try a different video.' 70 | template = """\ 71 | Answer a question when the question and the relevant data is given.\ 72 | Relevant data: {data}\ 73 | Question: {question}\ 74 | Answer: 75 | """ 76 | try: 77 | prompt = PromptTemplate(template=template, input_variables=["data", "question"]) 78 | llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model='gpt-3.5-turbo-16k-0613') 79 | # llm = PALM() 80 | llm_chain = LLMChain(prompt=prompt, llm=llm) 81 | output = llm_chain.run({'data':data, 'question':question}) 82 | timestamp = int(docs[0].metadata['start']) 83 | except Exception as e: 84 | return 'fail', 'Token limit exceeded.' 85 | return 'success', (output, timestamp) 86 | -------------------------------------------------------------------------------- /yt_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import assemblyai as aai 4 | from pytube import YouTube 5 | from youtube_transcript_api import YouTubeTranscriptApi, _errors 6 | 7 | def parse(data, dataType, durationAnchor): 8 | 9 | refined_data = [] 10 | 11 | start = None 12 | duration = 0 13 | EOL = False 14 | text = '' 15 | flag = False 16 | 17 | for splits in data: 18 | 19 | if not start: 20 | if dataType == 'subtitles': 21 | start = splits['start'] 22 | else: 23 | start = splits['start'] / 1000 24 | 25 | EOL = False 26 | text += f" {parseText(splits['text'])}" 27 | 28 | if dataType == 'subtitles': 29 | duration += splits['duration'] 30 | else: 31 | duration += splits['end'] - splits['start'] 32 | 33 | if '.' in splits['text']: 34 | EOL = True 35 | 36 | if dataType == 'subtitles': 37 | flag = duration > durationAnchor 38 | else: 39 | flag = duration > durationAnchor and EOL 40 | 41 | if flag: 42 | refined_data.append({'text': text, 'start': start}) 43 | text = '' 44 | EOL = False 45 | start = None 46 | duration = 0 47 | 48 | with open(f'output/refined_data.json', 'w') as f: 49 | f.write(json.dumps(refined_data)) 50 | 51 | def parseText(text): 52 | 53 | newText = "" 54 | text = text.replace('\n', ' ') 55 | for i in text: 56 | if i.isalnum() or i == " ": 57 | newText += i 58 | return newText 59 | 60 | def generate_subtitles(link): 61 | 62 | try: 63 | youtubeObject = YouTube(link) 64 | if youtubeObject.length > 360: 65 | return 'fail', 'Please try with a video that is shorter than 5 minutes.' 66 | youtubeObject = youtubeObject.streams.get_audio_only() 67 | youtubeObject.download('output/') 68 | except Exception as e: 69 | return 'fail', 'Please try again with a different link.' 70 | else: 71 | try: 72 | aai.settings.api_key = os.getenv('ASSEMBLYAI_API_KEY') 73 | transcriber = aai.Transcriber() 74 | transcript = transcriber.transcribe('output/' + youtubeObject.default_filename) 75 | except Exception as e: 76 | return 'fail', 'AssemblyAI token limit exceeded.' 77 | else: 78 | parse(transcript.json_response['words'], 'transcript', 20000) 79 | return 'success', transcript.text 80 | 81 | def get_subtitles(link): 82 | 83 | try: 84 | id = link[-11:] 85 | srt = YouTubeTranscriptApi.get_transcript(id) 86 | except _errors.TranscriptsDisabled as e: 87 | status, data = generate_subtitles(link) 88 | if status != 'success': 89 | return 'fail', data 90 | else: 91 | return status, data 92 | except Exception as e: 93 | return 'fail', 'Please try again with a different link.' 94 | else: 95 | parse(srt, 'subtitles', 20) 96 | subtitles = "" 97 | count = 0 98 | for parts in srt: 99 | subString = parseText(parts['text']) 100 | subtitles = f"{subtitles} {subString}" 101 | status = 'success' 102 | return status, subtitles -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # imports 2 | 3 | import streamlit as st 4 | from backend import get_summary, get_answer 5 | 6 | # page config 7 | 8 | st.set_page_config( 9 | page_title = 'YouTube Q & A', 10 | layout = 'wide', 11 | initial_sidebar_state = 'expanded' 12 | ) 13 | 14 | # side bar 15 | 16 | with st.sidebar: 17 | with open('sidebar.txt') as fb: 18 | text = fb.read() 19 | st.write(text) 20 | 21 | # main page 22 | 23 | st.title(":video_camera: YouTube Video Q and A") 24 | 25 | # program variables and functions 26 | 27 | process_video = None 28 | process_question = None 29 | go_to_timestamp = None 30 | 31 | @st.cache_data 32 | def call_get_summary(video_link): 33 | return get_summary(video_link) 34 | @st.cache_data 35 | def call_get_answer(question): 36 | return get_answer(question) 37 | 38 | # session state management 39 | 40 | if 'process_video_clicked' not in st.session_state: 41 | st.session_state.process_video_clicked = False 42 | if 'process_question_clicked' not in st.session_state: 43 | st.session_state.process_question_clicked = False 44 | if 'go_to_timestamp_clicked' not in st.session_state: 45 | st.session_state.go_to_timestamp_clicked = False 46 | if 'timestamp' not in st.session_state: 47 | st.session_state.timestamp = 0 48 | 49 | def process_video_callback(): 50 | st.session_state.process_video_clicked = True 51 | st.session_state.process_question_clicked = False 52 | def process_question_callback(): 53 | st.session_state.process_question_clicked = True 54 | st.session_state.go_to_timestamp_clicked = False 55 | def go_to_timestamp_callback(): 56 | st.session_state.go_to_timestamp_clicked = True 57 | 58 | 59 | # video link input container 60 | 61 | st.divider() 62 | container_1 = st.container() 63 | with container_1: 64 | st.text('Paste the link to the YouTube video here: ') 65 | col_1, col_2 = st.columns([0.8, 0.2]) 66 | with col_1: 67 | video_link = st.text_input( 68 | label = 'Video Link', 69 | label_visibility = 'collapsed', 70 | placeholder = 'https://www.youtube.com/watch?v=0CmtDk-joT4', 71 | #value = 'https://www.youtube.com/watch?v=0CmtDk-joT4' 72 | ) 73 | with col_2: 74 | process_video = st.button('Process Video', on_click=process_video_callback) 75 | 76 | # processed output container 77 | 78 | if process_video or st.session_state.process_video_clicked: 79 | if video_link == "": 80 | st.error("Please provide a valid link!") 81 | exit(0) 82 | st.session_state.process_video_clicked = True 83 | st.divider() 84 | container_2 = st.container() 85 | with container_2: 86 | col_1, col_2 = st.columns([0.6, 0.4]) 87 | with col_1: 88 | if 'shorts' in video_link: 89 | video_link = video_link.replace('shorts', 'embed') 90 | video = st.video(video_link, start_time = st.session_state.timestamp) 91 | with col_2: 92 | with st.spinner('Generating summary...'): 93 | status, data = call_get_summary(video_link) 94 | if status != 'success': 95 | st.error(data) 96 | exit(0) 97 | else: 98 | st.text('Summary of the video:') 99 | summary_box = st.text_area( 100 | label = 'Summary', 101 | label_visibility = 'collapsed', 102 | value = data, 103 | disabled = True, 104 | height = 300 105 | ) 106 | st.divider() 107 | st.text('Type your question here: ') 108 | col_1, col_2 = st.columns([0.8, 0.2]) 109 | with col_1: 110 | question = st.text_input( 111 | label = 'Question', 112 | label_visibility = 'collapsed', 113 | ) 114 | with col_2: 115 | process_question = st.button('Get Answer', on_click=process_question_callback) 116 | 117 | # answer container 118 | 119 | if process_question or st.session_state.process_question_clicked: 120 | container_3 = st.container() 121 | with container_3: 122 | with st.spinner('Finding answer...'): 123 | status, data = call_get_answer(question) 124 | if status != 'success': 125 | st.error(data) 126 | exit(0) 127 | else: 128 | answer, st.session_state.timestamp = data[0], data[1] 129 | st.text('The answer to your question: ') 130 | col_1, col_2 = st.columns([0.8, 0.2]) 131 | with col_1: 132 | answer_box = st.text_area( 133 | label = 'Answer', 134 | label_visibility = 'collapsed', 135 | value = answer, 136 | disabled = True, 137 | height = 300 138 | ) 139 | with col_2: 140 | go_to_timestamp = st.button('Go To Timestamp', on_click=go_to_timestamp_callback) 141 | --------------------------------------------------------------------------------