├── .gitignore
├── LICENSE
├── README.md
└── src
├── app.py
└── requirements.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110 | .pdm.toml
111 | .pdm-python
112 | .pdm-build/
113 |
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 |
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 |
121 | # SageMath parsed files
122 | *.sage.py
123 |
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 |
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 |
137 | # Rope project settings
138 | .ropeproject
139 |
140 | # mkdocs documentation
141 | /site
142 |
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 |
148 | # Pyre type checker
149 | .pyre/
150 |
151 | # pytype static type analyzer
152 | .pytype/
153 |
154 | # Cython debug symbols
155 | cython_debug/
156 |
157 | # PyCharm
158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160 | # and can be added to the global gitignore or merged into this file. For a more nuclear
161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162 | #.idea/
163 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Yazılım Academy
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # FastAPI YouTube Transcript Summarizer
2 |
3 | This project is a FastAPI-based application that fetches YouTube video transcripts, summarizes them using OpenAI's GPT models (O1 PRO, O1-MINI) and Claude 3.5 SONNET, and returns the summary in a specified language.
4 |
5 | ## Prerequisites
6 |
7 | - Python 3.7 or newer
8 | - OpenAI API key
9 |
10 | ## Installation
11 |
12 | 1. **Clone the repository**:
13 | ```bash
14 | git clone https://github.com/yazilimacademy/sumryza-backend
15 | cd sumryza-backend/src
16 | ```
17 |
18 | 2. **Install the required dependencies**:
19 | ```bash
20 | pip install fastapi uvicorn youtube-transcript-api openai python-dotenv
21 | ```
22 |
23 | 3. **Set up your OpenAI API key**:
24 | - Create a `.env` file in the root of the project.
25 | - Add your OpenAI API key in the following format:
26 |
27 | ```
28 | OPENAI_API_KEY=your_openai_api_key_here
29 | ```
30 |
31 | 4. **Run the application**:
32 | Start the FastAPI server using Uvicorn:
33 | ```bash
34 | uvicorn app:app --reload
35 | ```
36 |
37 | 5. **Access the application**:
38 | Once the server is running, you can access the API documentation at:
39 | ```
40 | http://127.0.0.1:8000/docs
41 | ```
42 |
43 | ## API Endpoints
44 |
45 | ### `GET /transcript`
46 |
47 | Fetches the transcript of a YouTube video and provides a summary in a specified language.
48 |
49 | #### Parameters:
50 | - `video_id` (str): The YouTube video ID (required).
51 | - `summary_language` (str): The preferred language for the summary (optional, default: "tr").
52 |
53 | #### Example Request:
54 | ```
55 | GET http://127.0.0.1:8000/transcript?video_id=VIDEO_ID&summary_language=en
56 | ```
57 |
58 | ### Example Response:
59 | ```json
60 | {
61 | "summary": "This is the summarized content of the video."
62 | }
63 | ```
64 |
65 | ## Notes:
66 | - The app tries ttis following to found a by more available transcript. All the application is iores for data deal and getting experiences.
67 | - The summary will be provided in the specified language using OpenAI help-making.
68 | - The parameters are video_id, summary_language, and responses as these want.
69 | - The project seek questions in this repose informative easd read application. Example testing informativolad videaltanals.
70 |
71 | ### A heartfelt thank you to my dear friends who supported me during Yazılım Academy's live broadcasts. 👇
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
--------------------------------------------------------------------------------
/src/app.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | from dotenv import load_dotenv
4 | from fastapi import FastAPI, HTTPException, Query
5 | from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound
6 | from fastapi.middleware.cors import CORSMiddleware
7 | import openai
8 |
9 | # Logging setup
10 | def setup_logging():
11 | logger = logging.getLogger("app_logger")
12 | logger.setLevel(logging.DEBUG)
13 |
14 | console_handler = logging.StreamHandler()
15 | console_handler.setLevel(logging.DEBUG)
16 | console_format = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
17 | console_handler.setFormatter(console_format)
18 |
19 | file_handler = logging.FileHandler("app.log", encoding='utf-8')
20 | file_handler.setLevel(logging.INFO)
21 | file_format = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
22 | file_handler.setFormatter(file_format)
23 |
24 | logger.addHandler(console_handler)
25 | logger.addHandler(file_handler)
26 |
27 | return logger
28 |
29 | logger = setup_logging()
30 |
31 | app = FastAPI()
32 |
33 | app.add_middleware(
34 | CORSMiddleware,
35 | allow_origins=["*"], # In production, specify your domains
36 | allow_credentials=True,
37 | allow_methods=["*"],
38 | allow_headers=["*"],
39 | )
40 |
41 | logger.info("Application starting and CORS middleware added.")
42 |
43 | # Load .env file
44 | load_dotenv()
45 |
46 | # Load OpenAI API key from environment variable
47 | openai_api_key = os.getenv("OPENAI_API_KEY")
48 | if not openai_api_key:
49 | logger.error("OpenAI API key is missing.")
50 | raise RuntimeError("OpenAI API key is missing.")
51 |
52 | try:
53 | openai.api_key = openai_api_key
54 | logger.info("OpenAI client initialized successfully.")
55 | except Exception as e:
56 | logger.error(f"Failed to initialize OpenAI client: {e}")
57 | raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
58 |
59 | # Map language codes to English language names for better clarity in prompts
60 | language_map = {
61 | "en": "English",
62 | "tr": "Turkish",
63 | "es": "Spanish",
64 | "fr": "French",
65 | "de": "German",
66 | "hi": "Hindi",
67 | "ar": "Arabic",
68 | "pt": "Portuguese",
69 | "ru": "Russian",
70 | "ja": "Japanese",
71 | "zh-Hans": "Simplified Chinese"
72 | }
73 |
74 | # Fetch transcript function
75 | def fetch_transcript(video_id, preferred_languages):
76 | try:
77 | transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
78 | logger.info("Transcripts listed successfully.")
79 | transcript_data = None
80 |
81 | # Try to find a manually created transcript in preferred languages
82 | for lang in preferred_languages:
83 | try:
84 | transcript_data = transcripts.find_transcript([lang]).fetch()
85 | logger.info(f"Manually created transcript found: Language = {lang}")
86 | break
87 | except NoTranscriptFound:
88 | logger.debug(f"No manually created transcript found for: {lang}")
89 | continue
90 |
91 | # Try to find a generated transcript in preferred languages
92 | if not transcript_data:
93 | for lang in preferred_languages:
94 | try:
95 | transcript_data = transcripts.find_generated_transcript([lang]).fetch()
96 | logger.info(f"Generated transcript found: Language = {lang}")
97 | break
98 | except NoTranscriptFound:
99 | logger.debug(f"No generated transcript found for: {lang}")
100 | continue
101 |
102 | # Fallback to any available transcript if none of the above worked
103 | if not transcript_data:
104 | try:
105 | first_transcript = next(iter(transcripts))
106 | transcript_data = first_transcript.fetch()
107 | logger.info(f"Any available transcript found: Language = {first_transcript.language}")
108 | except StopIteration:
109 | logger.error("No transcripts found at all.")
110 | raise HTTPException(status_code=400, detail="No transcripts found.")
111 | return transcript_data
112 | except Exception as e:
113 | logger.error(f"Error fetching transcript: {e}")
114 | raise HTTPException(status_code=400, detail=str(e))
115 |
116 | @app.get("/transcript")
117 | def get_transcript(
118 | video_id: str = Query(..., description="The YouTube video ID"),
119 | summary_language: str = Query("tr", description="Preferred summary language (e.g., 'en', 'tr', 'es')")
120 | ):
121 | logger.info(f"Transcript request received. Video ID: {video_id}, Summary language: {summary_language}")
122 |
123 | # Validate summary_language parameter
124 | if summary_language not in language_map:
125 | logger.warning(f"Invalid language code: {summary_language}. Defaulting to 'en'.")
126 | summary_language = "en"
127 |
128 | # Preferred languages for transcripts (in order)
129 | preferred_languages = [
130 | "en", "es", "zh-Hans", "hi", "ar", "pt", "ru", "ja", "fr", "de"
131 | ]
132 |
133 | try:
134 | transcript_data = fetch_transcript(video_id, preferred_languages)
135 |
136 | # Combine transcript text
137 | full_transcript_text = " ".join([entry['text'] for entry in transcript_data])
138 | logger.info("Transcript text combined successfully.")
139 |
140 | # Determine the target language name from the map, defaulting to English if not found
141 | chosen_language_name = language_map.get(summary_language, "English")
142 |
143 | # Create prompt for OpenAI
144 | prompt = f"Please summarize the following transcript in {chosen_language_name}:\n\n{full_transcript_text}"
145 | logger.info("Prompt created for OpenAI.")
146 |
147 | # Call OpenAI API for summarization
148 | logger.info("Sending summarization request to OpenAI API.")
149 | response = openai.chat.completions.create(
150 | model="gpt-4o-mini", # Check if this model is correct
151 | messages=[
152 | {
153 | "role": "system",
154 | "content": (
155 | f"You are an assistant specializing in summarizing transcripts into {chosen_language_name}. "
156 | "Please provide a brief, coherent summary that captures the most important points and ideas from the transcript. "
157 | "Do not include extraneous commentary, background information, or introductions. "
158 | "The summary should be clear, concise, and easy to follow for someone who has not read the full transcript."
159 | )
160 | },
161 | {
162 | "role": "user",
163 | "content": prompt
164 | }
165 | ],
166 | temperature=0.8,
167 | max_tokens=3900,
168 | top_p=1.0
169 | )
170 | logger.info("OpenAI API response received.")
171 |
172 | summary = response.choices[0].message.content.strip()
173 | logger.info("Summary successfully created.")
174 |
175 | return {"summary": summary}
176 |
177 | except HTTPException as http_exc:
178 | logger.error(f"HTTP error occurred: {http_exc.detail}")
179 | raise http_exc
180 | except Exception as e:
181 | logger.error(f"General error occurred: {e}")
182 | raise HTTPException(status_code=400, detail=str(e))
183 |
--------------------------------------------------------------------------------
/src/requirements.txt:
--------------------------------------------------------------------------------
1 | fastapi
2 | uvicorn
3 | youtube-transcript-api
4 | openai
5 | python-dotenv
--------------------------------------------------------------------------------