├── .gitignore ├── LICENSE ├── README.md └── src ├── app.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Yazılım Academy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FastAPI YouTube Transcript Summarizer 2 | 3 | This project is a FastAPI-based application that fetches YouTube video transcripts, summarizes them using OpenAI's GPT models (O1 PRO, O1-MINI) and Claude 3.5 SONNET, and returns the summary in a specified language. 4 | 5 | ## Prerequisites 6 | 7 | - Python 3.7 or newer 8 | - OpenAI API key 9 | 10 | ## Installation 11 | 12 | 1. **Clone the repository**: 13 | ```bash 14 | git clone https://github.com/yazilimacademy/sumryza-backend 15 | cd sumryza-backend/src 16 | ``` 17 | 18 | 2. **Install the required dependencies**: 19 | ```bash 20 | pip install fastapi uvicorn youtube-transcript-api openai python-dotenv 21 | ``` 22 | 23 | 3. **Set up your OpenAI API key**: 24 | - Create a `.env` file in the root of the project. 25 | - Add your OpenAI API key in the following format: 26 | 27 | ``` 28 | OPENAI_API_KEY=your_openai_api_key_here 29 | ``` 30 | 31 | 4. **Run the application**: 32 | Start the FastAPI server using Uvicorn: 33 | ```bash 34 | uvicorn app:app --reload 35 | ``` 36 | 37 | 5. **Access the application**: 38 | Once the server is running, you can access the API documentation at: 39 | ``` 40 | http://127.0.0.1:8000/docs 41 | ``` 42 | 43 | ## API Endpoints 44 | 45 | ### `GET /transcript` 46 | 47 | Fetches the transcript of a YouTube video and provides a summary in a specified language. 48 | 49 | #### Parameters: 50 | - `video_id` (str): The YouTube video ID (required). 51 | - `summary_language` (str): The preferred language for the summary (optional, default: "tr"). 52 | 53 | #### Example Request: 54 | ``` 55 | GET http://127.0.0.1:8000/transcript?video_id=VIDEO_ID&summary_language=en 56 | ``` 57 | 58 | ### Example Response: 59 | ```json 60 | { 61 | "summary": "This is the summarized content of the video." 62 | } 63 | ``` 64 | 65 | ## Notes: 66 | - The app tries ttis following to found a by more available transcript. All the application is iores for data deal and getting experiences. 67 | - The summary will be provided in the specified language using OpenAI help-making. 68 | - The parameters are video_id, summary_language, and responses as these want. 69 | - The project seek questions in this repose informative easd read application. Example testing informativolad videaltanals. 70 | 71 | ### A heartfelt thank you to my dear friends who supported me during Yazılım Academy's live broadcasts. 👇 72 | 73 | altudev 74 | HikmetMelikk 75 | merveeksi 76 | KardelRuveyda 77 | Taiizor 78 | k-celal 79 | serkutYILDIRIM 80 | nurullahnamal 81 | MSimsek07 82 | alihangudenoglu 83 | EmirhanKara 84 | ladrons 85 | EmreAka 86 | -------------------------------------------------------------------------------- /src/app.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from dotenv import load_dotenv 4 | from fastapi import FastAPI, HTTPException, Query 5 | from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound 6 | from fastapi.middleware.cors import CORSMiddleware 7 | import openai 8 | 9 | # Logging setup 10 | def setup_logging(): 11 | logger = logging.getLogger("app_logger") 12 | logger.setLevel(logging.DEBUG) 13 | 14 | console_handler = logging.StreamHandler() 15 | console_handler.setLevel(logging.DEBUG) 16 | console_format = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') 17 | console_handler.setFormatter(console_format) 18 | 19 | file_handler = logging.FileHandler("app.log", encoding='utf-8') 20 | file_handler.setLevel(logging.INFO) 21 | file_format = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') 22 | file_handler.setFormatter(file_format) 23 | 24 | logger.addHandler(console_handler) 25 | logger.addHandler(file_handler) 26 | 27 | return logger 28 | 29 | logger = setup_logging() 30 | 31 | app = FastAPI() 32 | 33 | app.add_middleware( 34 | CORSMiddleware, 35 | allow_origins=["*"], # In production, specify your domains 36 | allow_credentials=True, 37 | allow_methods=["*"], 38 | allow_headers=["*"], 39 | ) 40 | 41 | logger.info("Application starting and CORS middleware added.") 42 | 43 | # Load .env file 44 | load_dotenv() 45 | 46 | # Load OpenAI API key from environment variable 47 | openai_api_key = os.getenv("OPENAI_API_KEY") 48 | if not openai_api_key: 49 | logger.error("OpenAI API key is missing.") 50 | raise RuntimeError("OpenAI API key is missing.") 51 | 52 | try: 53 | openai.api_key = openai_api_key 54 | logger.info("OpenAI client initialized successfully.") 55 | except Exception as e: 56 | logger.error(f"Failed to initialize OpenAI client: {e}") 57 | raise RuntimeError(f"Failed to initialize OpenAI client: {e}") 58 | 59 | # Map language codes to English language names for better clarity in prompts 60 | language_map = { 61 | "en": "English", 62 | "tr": "Turkish", 63 | "es": "Spanish", 64 | "fr": "French", 65 | "de": "German", 66 | "hi": "Hindi", 67 | "ar": "Arabic", 68 | "pt": "Portuguese", 69 | "ru": "Russian", 70 | "ja": "Japanese", 71 | "zh-Hans": "Simplified Chinese" 72 | } 73 | 74 | # Fetch transcript function 75 | def fetch_transcript(video_id, preferred_languages): 76 | try: 77 | transcripts = YouTubeTranscriptApi.list_transcripts(video_id) 78 | logger.info("Transcripts listed successfully.") 79 | transcript_data = None 80 | 81 | # Try to find a manually created transcript in preferred languages 82 | for lang in preferred_languages: 83 | try: 84 | transcript_data = transcripts.find_transcript([lang]).fetch() 85 | logger.info(f"Manually created transcript found: Language = {lang}") 86 | break 87 | except NoTranscriptFound: 88 | logger.debug(f"No manually created transcript found for: {lang}") 89 | continue 90 | 91 | # Try to find a generated transcript in preferred languages 92 | if not transcript_data: 93 | for lang in preferred_languages: 94 | try: 95 | transcript_data = transcripts.find_generated_transcript([lang]).fetch() 96 | logger.info(f"Generated transcript found: Language = {lang}") 97 | break 98 | except NoTranscriptFound: 99 | logger.debug(f"No generated transcript found for: {lang}") 100 | continue 101 | 102 | # Fallback to any available transcript if none of the above worked 103 | if not transcript_data: 104 | try: 105 | first_transcript = next(iter(transcripts)) 106 | transcript_data = first_transcript.fetch() 107 | logger.info(f"Any available transcript found: Language = {first_transcript.language}") 108 | except StopIteration: 109 | logger.error("No transcripts found at all.") 110 | raise HTTPException(status_code=400, detail="No transcripts found.") 111 | return transcript_data 112 | except Exception as e: 113 | logger.error(f"Error fetching transcript: {e}") 114 | raise HTTPException(status_code=400, detail=str(e)) 115 | 116 | @app.get("/transcript") 117 | def get_transcript( 118 | video_id: str = Query(..., description="The YouTube video ID"), 119 | summary_language: str = Query("tr", description="Preferred summary language (e.g., 'en', 'tr', 'es')") 120 | ): 121 | logger.info(f"Transcript request received. Video ID: {video_id}, Summary language: {summary_language}") 122 | 123 | # Validate summary_language parameter 124 | if summary_language not in language_map: 125 | logger.warning(f"Invalid language code: {summary_language}. Defaulting to 'en'.") 126 | summary_language = "en" 127 | 128 | # Preferred languages for transcripts (in order) 129 | preferred_languages = [ 130 | "en", "es", "zh-Hans", "hi", "ar", "pt", "ru", "ja", "fr", "de" 131 | ] 132 | 133 | try: 134 | transcript_data = fetch_transcript(video_id, preferred_languages) 135 | 136 | # Combine transcript text 137 | full_transcript_text = " ".join([entry['text'] for entry in transcript_data]) 138 | logger.info("Transcript text combined successfully.") 139 | 140 | # Determine the target language name from the map, defaulting to English if not found 141 | chosen_language_name = language_map.get(summary_language, "English") 142 | 143 | # Create prompt for OpenAI 144 | prompt = f"Please summarize the following transcript in {chosen_language_name}:\n\n{full_transcript_text}" 145 | logger.info("Prompt created for OpenAI.") 146 | 147 | # Call OpenAI API for summarization 148 | logger.info("Sending summarization request to OpenAI API.") 149 | response = openai.chat.completions.create( 150 | model="gpt-4o-mini", # Check if this model is correct 151 | messages=[ 152 | { 153 | "role": "system", 154 | "content": ( 155 | f"You are an assistant specializing in summarizing transcripts into {chosen_language_name}. " 156 | "Please provide a brief, coherent summary that captures the most important points and ideas from the transcript. " 157 | "Do not include extraneous commentary, background information, or introductions. " 158 | "The summary should be clear, concise, and easy to follow for someone who has not read the full transcript." 159 | ) 160 | }, 161 | { 162 | "role": "user", 163 | "content": prompt 164 | } 165 | ], 166 | temperature=0.8, 167 | max_tokens=3900, 168 | top_p=1.0 169 | ) 170 | logger.info("OpenAI API response received.") 171 | 172 | summary = response.choices[0].message.content.strip() 173 | logger.info("Summary successfully created.") 174 | 175 | return {"summary": summary} 176 | 177 | except HTTPException as http_exc: 178 | logger.error(f"HTTP error occurred: {http_exc.detail}") 179 | raise http_exc 180 | except Exception as e: 181 | logger.error(f"General error occurred: {e}") 182 | raise HTTPException(status_code=400, detail=str(e)) 183 | -------------------------------------------------------------------------------- /src/requirements.txt: -------------------------------------------------------------------------------- 1 | fastapi 2 | uvicorn 3 | youtube-transcript-api 4 | openai 5 | python-dotenv --------------------------------------------------------------------------------