├── .gitignore
├── LICENSE
├── README.md
└── src
    ├── app.py
    └── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110 | .pdm.toml
111 | .pdm-python
112 | .pdm-build/
113 | 
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 | 
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 | 
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 | 
137 | # Rope project settings
138 | .ropeproject
139 | 
140 | # mkdocs documentation
141 | /site
142 | 
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 | 
148 | # Pyre type checker
149 | .pyre/
150 | 
151 | # pytype static type analyzer
152 | .pytype/
153 | 
154 | # Cython debug symbols
155 | cython_debug/
156 | 
157 | # PyCharm
158 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
161 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
162 | #.idea/
163 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Yazılım Academy
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # FastAPI YouTube Transcript Summarizer
 2 | 
 3 | This project is a FastAPI-based application that fetches YouTube video transcripts, summarizes them using OpenAI's GPT models (O1 PRO, O1-MINI) and Claude 3.5 SONNET, and returns the summary in a specified language.
 4 | 
 5 | ## Prerequisites
 6 | 
 7 | - Python 3.7 or newer
 8 | - OpenAI API key
 9 | 
10 | ## Installation
11 | 
12 | 1. **Clone the repository**:
13 |     ```bash
14 |     git clone https://github.com/yazilimacademy/sumryza-backend
15 |     cd sumryza-backend/src
16 |     ```
17 | 
18 | 2. **Install the required dependencies**:
19 |     ```bash
20 |     pip install fastapi uvicorn youtube-transcript-api openai python-dotenv
21 |     ```
22 | 
23 | 3. **Set up your OpenAI API key**:
24 |     - Create a `.env` file in the root of the project.
25 |     - Add your OpenAI API key in the following format:
26 |     
27 |     ```
28 |     OPENAI_API_KEY=your_openai_api_key_here
29 |     ```
30 | 
31 | 4. **Run the application**:
32 |     Start the FastAPI server using Uvicorn:
33 |     ```bash
34 |     uvicorn app:app --reload
35 |     ```
36 | 
37 | 5. **Access the application**:
38 |     Once the server is running, you can access the API documentation at:
39 |     ```
40 |     http://127.0.0.1:8000/docs
41 |     ```
42 | 
43 | ## API Endpoints
44 | 
45 | ### `GET /transcript`
46 | 
47 | Fetches the transcript of a YouTube video and provides a summary in a specified language.
48 | 
49 | #### Parameters:
50 | - `video_id` (str): The YouTube video ID (required).
51 | - `summary_language` (str): The preferred language for the summary (optional, default: "tr").
52 | 
53 | #### Example Request:
54 | ```
55 | GET http://127.0.0.1:8000/transcript?video_id=VIDEO_ID&summary_language=en
56 | ```
57 | 
58 | ### Example Response:
59 | ```json
60 | {
61 |     "summary": "This is the summarized content of the video."
62 | }
63 | ```
64 | 
65 | ## Notes:
66 | - The app tries ttis following to found a by more available transcript. All the application is iores for data deal and getting experiences.
67 | - The summary will be provided in the specified language using OpenAI help-making.
68 | - The parameters are video_id, summary_language, and responses as these want.
69 | - The project seek questions in this repose informative easd read application. Example testing informativolad videaltanals.
70 | 
71 | ### A heartfelt thank you to my dear friends who supported me during Yazılım Academy's live broadcasts. 👇
72 | 
73 | <a href="https://github.com/altudev"><img width="60px" alt="altudev" src="https://github.com/altudev.png"/></a>
74 | <a href="https://github.com/HikmetMelikk"><img width="60px" alt="HikmetMelikk" src="https://github.com/HikmetMelikk.png"/></a>
75 | <a href="https://github.com/merveeksi"><img width="60px" alt="merveeksi" src="https://github.com/merveeksi.png"/></a>
76 | <a href="https://github.com/KardelRuveyda"><img width="60px" alt="KardelRuveyda" src="https://github.com/KardelRuveyda.png"/></a>
77 | <a href="https://github.com/Taiizor"><img width="60px" alt="Taiizor" src="https://github.com/Taiizor.png"/></a>
78 | <a href="https://github.com/k-celal"><img width="60px" alt="k-celal" src="https://github.com/k-celal.png"/></a>
79 | <a href="https://github.com/serkutYILDIRIM"><img width="60px" alt="serkutYILDIRIM" src="https://github.com/serkutYILDIRIM.png"/></a>
80 | <a href="https://github.com/nurullahnamal"><img width="60px" alt="nurullahnamal" src="https://github.com/nurullahnamal.png"/></a>
81 | <a href="https://github.com/MSimsek07"><img width="60px" alt="MSimsek07" src="https://github.com/MSimsek07.png"/></a>
82 | <a href="https://github.com/alihangudenoglu"><img width="60px" alt="alihangudenoglu" src="https://github.com/alihangudenoglu.png"/></a>
83 | <a href="https://github.com/iparzival0"><img width="60px" alt="EmirhanKara" src="https://github.com/iparzival0.png"/></a>
84 | <a href="https://github.com/ladrons"><img width="60px" alt="ladrons" src="https://github.com/ladrons.png"/></a>
85 | <a href="https://github.com/EmreAka"><img width="60px" alt="EmreAka" src="https://github.com/EmreAka.png"/></a>
86 | 


--------------------------------------------------------------------------------
/src/app.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | from dotenv import load_dotenv
  4 | from fastapi import FastAPI, HTTPException, Query
  5 | from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound
  6 | from fastapi.middleware.cors import CORSMiddleware
  7 | import openai
  8 | 
  9 | # Logging setup
 10 | def setup_logging():
 11 |     logger = logging.getLogger("app_logger")
 12 |     logger.setLevel(logging.DEBUG)
 13 | 
 14 |     console_handler = logging.StreamHandler()
 15 |     console_handler.setLevel(logging.DEBUG)
 16 |     console_format = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 17 |     console_handler.setFormatter(console_format)
 18 | 
 19 |     file_handler = logging.FileHandler("app.log", encoding='utf-8')
 20 |     file_handler.setLevel(logging.INFO)
 21 |     file_format = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 22 |     file_handler.setFormatter(file_format)
 23 | 
 24 |     logger.addHandler(console_handler)
 25 |     logger.addHandler(file_handler)
 26 | 
 27 |     return logger
 28 | 
 29 | logger = setup_logging()
 30 | 
 31 | app = FastAPI()
 32 | 
 33 | app.add_middleware(
 34 |     CORSMiddleware,
 35 |     allow_origins=["*"],  # In production, specify your domains
 36 |     allow_credentials=True,
 37 |     allow_methods=["*"],
 38 |     allow_headers=["*"],
 39 | )
 40 | 
 41 | logger.info("Application starting and CORS middleware added.")
 42 | 
 43 | # Load .env file
 44 | load_dotenv()
 45 | 
 46 | # Load OpenAI API key from environment variable
 47 | openai_api_key = os.getenv("OPENAI_API_KEY")
 48 | if not openai_api_key:
 49 |     logger.error("OpenAI API key is missing.")
 50 |     raise RuntimeError("OpenAI API key is missing.")
 51 | 
 52 | try:
 53 |     openai.api_key = openai_api_key
 54 |     logger.info("OpenAI client initialized successfully.")
 55 | except Exception as e:
 56 |     logger.error(f"Failed to initialize OpenAI client: {e}")
 57 |     raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
 58 | 
 59 | # Map language codes to English language names for better clarity in prompts
 60 | language_map = {
 61 |     "en": "English",
 62 |     "tr": "Turkish",
 63 |     "es": "Spanish",
 64 |     "fr": "French",
 65 |     "de": "German",
 66 |     "hi": "Hindi",
 67 |     "ar": "Arabic",
 68 |     "pt": "Portuguese",
 69 |     "ru": "Russian",
 70 |     "ja": "Japanese",
 71 |     "zh-Hans": "Simplified Chinese"
 72 | }
 73 | 
 74 | # Fetch transcript function
 75 | def fetch_transcript(video_id, preferred_languages):
 76 |     try:
 77 |         transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
 78 |         logger.info("Transcripts listed successfully.")
 79 |         transcript_data = None
 80 | 
 81 |         # Try to find a manually created transcript in preferred languages
 82 |         for lang in preferred_languages:
 83 |             try:
 84 |                 transcript_data = transcripts.find_transcript([lang]).fetch()
 85 |                 logger.info(f"Manually created transcript found: Language = {lang}")
 86 |                 break
 87 |             except NoTranscriptFound:
 88 |                 logger.debug(f"No manually created transcript found for: {lang}")
 89 |                 continue
 90 | 
 91 |         # Try to find a generated transcript in preferred languages
 92 |         if not transcript_data:
 93 |             for lang in preferred_languages:
 94 |                 try:
 95 |                     transcript_data = transcripts.find_generated_transcript([lang]).fetch()
 96 |                     logger.info(f"Generated transcript found: Language = {lang}")
 97 |                     break
 98 |                 except NoTranscriptFound:
 99 |                     logger.debug(f"No generated transcript found for: {lang}")
100 |                     continue
101 | 
102 |         # Fallback to any available transcript if none of the above worked
103 |         if not transcript_data:
104 |             try:
105 |                 first_transcript = next(iter(transcripts))
106 |                 transcript_data = first_transcript.fetch()
107 |                 logger.info(f"Any available transcript found: Language = {first_transcript.language}")
108 |             except StopIteration:
109 |                 logger.error("No transcripts found at all.")
110 |                 raise HTTPException(status_code=400, detail="No transcripts found.")
111 |         return transcript_data
112 |     except Exception as e:
113 |         logger.error(f"Error fetching transcript: {e}")
114 |         raise HTTPException(status_code=400, detail=str(e))
115 | 
116 | @app.get("/transcript")
117 | def get_transcript(
118 |     video_id: str = Query(..., description="The YouTube video ID"),
119 |     summary_language: str = Query("tr", description="Preferred summary language (e.g., 'en', 'tr', 'es')")
120 | ):
121 |     logger.info(f"Transcript request received. Video ID: {video_id}, Summary language: {summary_language}")
122 | 
123 |     # Validate summary_language parameter
124 |     if summary_language not in language_map:
125 |         logger.warning(f"Invalid language code: {summary_language}. Defaulting to 'en'.")
126 |         summary_language = "en"
127 | 
128 |     # Preferred languages for transcripts (in order)
129 |     preferred_languages = [
130 |         "en", "es", "zh-Hans", "hi", "ar", "pt", "ru", "ja", "fr", "de"
131 |     ]
132 | 
133 |     try:
134 |         transcript_data = fetch_transcript(video_id, preferred_languages)
135 | 
136 |         # Combine transcript text
137 |         full_transcript_text = " ".join([entry['text'] for entry in transcript_data])
138 |         logger.info("Transcript text combined successfully.")
139 | 
140 |         # Determine the target language name from the map, defaulting to English if not found
141 |         chosen_language_name = language_map.get(summary_language, "English")
142 | 
143 |         # Create prompt for OpenAI
144 |         prompt = f"Please summarize the following transcript in {chosen_language_name}:\n\n{full_transcript_text}"
145 |         logger.info("Prompt created for OpenAI.")
146 | 
147 |         # Call OpenAI API for summarization
148 |         logger.info("Sending summarization request to OpenAI API.")
149 |         response = openai.chat.completions.create(
150 |             model="gpt-4o-mini", # Check if this model is correct
151 |             messages=[
152 |                 {
153 |                     "role": "system",
154 |                     "content": (
155 |                         f"You are an assistant specializing in summarizing transcripts into {chosen_language_name}. "
156 |                         "Please provide a brief, coherent summary that captures the most important points and ideas from the transcript. "
157 |                         "Do not include extraneous commentary, background information, or introductions. "
158 |                         "The summary should be clear, concise, and easy to follow for someone who has not read the full transcript."
159 |                     )
160 |                 },
161 |                 {
162 |                     "role": "user",
163 |                     "content": prompt
164 |                 }
165 |             ],
166 |             temperature=0.8,
167 |             max_tokens=3900,
168 |             top_p=1.0
169 |         )
170 |         logger.info("OpenAI API response received.")
171 | 
172 |         summary = response.choices[0].message.content.strip()
173 |         logger.info("Summary successfully created.")
174 | 
175 |         return {"summary": summary}
176 | 
177 |     except HTTPException as http_exc:
178 |         logger.error(f"HTTP error occurred: {http_exc.detail}")
179 |         raise http_exc
180 |     except Exception as e:
181 |         logger.error(f"General error occurred: {e}")
182 |         raise HTTPException(status_code=400, detail=str(e))
183 | 


--------------------------------------------------------------------------------
/src/requirements.txt:
--------------------------------------------------------------------------------
1 | fastapi
2 | uvicorn
3 | youtube-transcript-api
4 | openai
5 | python-dotenv 


--------------------------------------------------------------------------------