├── .gitignore
├── Dockerfile
├── README.md
├── client.py
├── env.example
├── main.py
├── pyproject.toml
├── requirements.txt
├── server.py
└── smithery.yaml


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python-generated files
 2 | __pycache__/
 3 | *.py[oc]
 4 | build/
 5 | dist/
 6 | wheels/
 7 | *.egg-info
 8 | .python-version
 9 | uv.lock
10 | 
11 | # Virtual environments
12 | .venv
13 | logs/
14 | .env
15 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile
 2 | FROM python:3.12-slim
 3 | 
 4 | # Set a working directory
 5 | WORKDIR /app
 6 | 
 7 | # Copy all files to the container
 8 | COPY . .
 9 | 
10 | # Install Python dependencies
11 | RUN pip install --no-cache-dir google-api-python-client mcp[cli] python-dotenv youtube-transcript-api
12 | 
13 | # Set environment variables placeholders (they will be overridden by startCommand config)
14 | ENV YOUTUBE_API_KEY=your_youtube_api_key
15 | 
16 | # Command to run the MCP server
17 | CMD ["python", "server.py"]
18 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # py-mcp-youtube-toolbox
  2 | [![smithery badge](https://smithery.ai/badge/@jikime/py-mcp-youtube-toolbox)](https://smithery.ai/server/@jikime/py-mcp-youtube-toolbox) ![](https://badge.mcpx.dev?type=server 'MCP Server') ![Version](https://img.shields.io/badge/version-1.0.0-green) ![License](https://img.shields.io/badge/license-MIT-blue)
  3 | 
  4 | An MCP server that provides AI assistants with powerful tools to interact with YouTube, including video searching, transcript extraction, comment retrieval, and more.
  5 | 
  6 | ## Overview
  7 | 
  8 | py-mcp-youtube-toolbox provides the following YouTube-related functionalities:
  9 | 
 10 | - Search YouTube videos with advanced filtering options
 11 | - Get detailed information about videos and channels
 12 | - Retrieve video comments with sorting options
 13 | - Extract video transcripts and captions in multiple languages
 14 | - Find related videos for a given video
 15 | - Get trending videos by region
 16 | - Generate summaries of video content based on transcripts
 17 | - Advanced transcript analysis with filtering, searching, and multi-video capabilities
 18 | 
 19 | ## Table of Contents
 20 | 
 21 | - [Prerequisites](#prerequisites)
 22 | - [Installation](#installation)
 23 | - [Configure MCP Settings](#configure-mcp-settings)
 24 | - [Tools Documentation](#tools-documentation)
 25 |   - [Video Tools](#video-tools)
 26 |   - [Channel Tools](#channel-tools)
 27 |   - [Transcript Tools](#transcript-tools)
 28 |   - [Prompt Tools](#prompt-tools)
 29 |   - [Resource Tools](#resource-tools)
 30 | - [Development](#development)
 31 | - [License](#license)
 32 | 
 33 | ## Prerequisites
 34 | 1. **Python**: Install Python 3.12 or higher
 35 | 2. **YouTube API Key**:
 36 |    - Go to [Google Cloud Console](https://console.cloud.google.com/)
 37 |    - Create a new project or select an existing one
 38 |    - Enable the YouTube Data API v3:
 39 |      1. Go to "APIs & Services" > "Library"
 40 |      2. Search for and enable "YouTube Data API v3"
 41 |    - Create credentials:
 42 |      1. Go to "APIs & Services" > "Credentials"
 43 |      2. Click "Create Credentials" > "API key"
 44 |      3. Note down your API key
 45 | 
 46 | ## Installation
 47 | #### Git Clone
 48 | ```bash
 49 | git clone https://github.com/jikime/py-mcp-youtube-toolbox.git
 50 | cd py-mcp-youtube-toolbox
 51 | ```
 52 | 
 53 | #### Configuration 
 54 | 1. Install UV package manager:
 55 | ```bash
 56 | curl -LsSf https://astral.sh/uv/install.sh | sh
 57 | ```
 58 | 
 59 | 2. Create and activate virtual environment:
 60 | ```bash
 61 | uv venv -p 3.12
 62 | source .venv/bin/activate  # On MacOS/Linux
 63 | # or
 64 | .venv\Scripts\activate  # On Windows
 65 | ```
 66 | 
 67 | 3. Install dependencies:
 68 | ```bash
 69 | uv pip install -r requirements.txt
 70 | ```
 71 | 
 72 | 4. Environment variables:
 73 | ```bash
 74 | cp env.example .env
 75 | vi .env
 76 | # Update with your YouTube API key
 77 | YOUTUBE_API_KEY=your_youtube_api_key
 78 | ```
 79 | 
 80 | #### Using Docker
 81 | 
 82 | 1. Build the Docker image:
 83 | ```bash
 84 | docker build -t py-mcp-youtube-toolbox .
 85 | ```
 86 | 
 87 | 2. Run the container:
 88 | ```bash
 89 | docker run -e YOUTUBE_API_KEY=your_youtube_api_key py-mcp-youtube-toolbox
 90 | ```
 91 | 
 92 | #### Using Local
 93 | 
 94 | 1. Run the server:
 95 | ```bash
 96 | mcp run server.py
 97 | ```
 98 | 
 99 | 2. Run the MCP Inspector:
100 | ```bash
101 | mcp dev server.py
102 | ```
103 | 
104 | ## Configure MCP Settings
105 | Add the server configuration to your MCP settings file:
106 | 
107 | #### Claude desktop app 
108 | 1. To install automatically via [Smithery](https://smithery.ai/server/@jikime/py-mcp-youtube-toolbox):
109 | 
110 | ```bash
111 | npx -y @smithery/cli install @jikime/py-mcp-youtube-toolbox --client claude
112 | ```
113 | 
114 | 2. To install manually
115 | open `~/Library/Application Support/Claude/claude_desktop_config.json`
116 | 
117 | Add this to the `mcpServers` object:
118 | ```json
119 | {
120 |   "mcpServers": {
121 |     "YouTube Toolbox": {
122 |       "command": "/path/to/bin/uv",
123 |       "args": [
124 |         "--directory",
125 |         "/path/to/py-mcp-youtube-toolbox",
126 |         "run",
127 |         "server.py"
128 |       ],
129 |       "env": {
130 |         "YOUTUBE_API_KEY": "your_youtube_api_key"
131 |       }
132 |     }
133 |   }
134 | }
135 | ```
136 | 
137 | #### Cursor IDE 
138 | open `~/.cursor/mcp.json`
139 | 
140 | Add this to the `mcpServers` object:
141 | ```json
142 | {
143 |   "mcpServers": {
144 |     "YouTube Toolbox": {
145 |       "command": "/path/to/bin/uv",
146 |       "args": [
147 |         "--directory",
148 |         "/path/to/py-mcp-youtube-toolbox",
149 |         "run",
150 |         "server.py"
151 |       ],
152 |       "env": {
153 |         "YOUTUBE_API_KEY": "your_youtube_api_key"
154 |       }
155 |     }
156 |   }
157 | }
158 | ```
159 | 
160 | #### for Docker
161 | ```json
162 | {
163 |   "mcpServers": {
164 |     "YouTube Toolbox": {
165 |       "command": "docker",
166 |       "args": [
167 |         "run",
168 |         "-i",
169 |         "--rm",
170 |         "-e", "YOUTUBE_API_KEY=your_youtube_api_key",
171 |         "py-mcp-youtube-toolbox"
172 |       ]
173 |     }
174 |   }
175 | }
176 | ```
177 | 
178 | ## Tools Documentation
179 | 
180 | ### Video Tools
181 | 
182 | - `search_videos`: Search for YouTube videos with advanced filtering options (channel, duration, region, etc.)
183 | - `get_video_details`: Get detailed information about a specific YouTube video (title, channel, views, likes, etc.)
184 | - `get_video_comments`: Retrieve comments from a YouTube video with sorting options
185 | - `get_related_videos`: Find videos related to a specific YouTube video
186 | - `get_trending_videos`: Get trending videos on YouTube by region
187 | 
188 | ### Channel Tools
189 | 
190 | - `get_channel_details`: Get detailed information about a YouTube channel (name, subscribers, views, etc.)
191 | 
192 | ### Transcript Tools
193 | 
194 | - `get_video_transcript`: Extract transcripts/captions from YouTube videos in specified languages
195 | - `get_video_enhanced_transcript`: Advanced transcript extraction with filtering, search, and multi-video capabilities
196 | 
197 | ### Prompt Tools
198 | 
199 | - `transcript_summary`: Generate summaries of YouTube video content based on transcripts with customizable options
200 | 
201 | ### Resource Tools
202 | 
203 | - `youtube://available-youtube-tools`: Get a list of all available YouTube tools
204 | - `youtube://video/{video_id}`: Get detailed information about a specific video
205 | - `youtube://channel/{channel_id}`: Get information about a specific channel
206 | - `youtube://transcript/{video_id}?language={language}`: Get transcript for a specific video
207 | 
208 | ## Development
209 | 
210 | For local testing, you can use the included client script:
211 | 
212 | ```bash
213 | # Example: Search videos
214 | uv run client.py search_videos query="MCP" max_results=5
215 | 
216 | # Example: Get video details
217 | uv run client.py get_video_details video_id=zRgAEIoZEVQ
218 | 
219 | # Example: Get channel details
220 | uv run client.py get_channel_details channel_id=UCRpOIr-NJpK9S483ge20Pgw
221 | 
222 | # Example: Get video comments
223 | uv run client.py get_video_comments video_id=zRgAEIoZEVQ max_results=10 order=time
224 | 
225 | # Example: Get video transcript
226 | uv run client.py get_video_transcript video_id=zRgAEIoZEVQ language=ko
227 | 
228 | # Example: Get related videos
229 | uv run client.py get_related_videos video_id=zRgAEIoZEVQ max_results=5
230 | 
231 | # Example: Get trending videos
232 | uv run client.py get_trending_videos region_code=ko max_results=10
233 | 
234 | # Example: Advanced transcript extraction
235 | uv run client.py get_video_enhanced_transcript video_ids=zRgAEIoZEVQ language=ko format=timestamped include_metadata=true start_time=100 end_time=200 query=에이전트 case_sensitive=true segment_method=equal segment_count=2
236 | 
237 | # Example: 
238 | ```
239 | 
240 | ## License
241 | 
242 | MIT License
243 | 


--------------------------------------------------------------------------------
/client.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import sys
  3 | import json
  4 | import logging
  5 | from mcp import ClientSession, StdioServerParameters
  6 | from mcp.client.stdio import stdio_client
  7 | from typing import Any, Dict, List, Optional
  8 | 
  9 | # --- 로깅 설정 ---
 10 | logging.basicConfig(
 11 |     level=logging.DEBUG,
 12 |     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 13 | )
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | # 서버 스크립트 파일명 (실제 서버 파일명과 일치해야 함)
 17 | SERVER_SCRIPT_FILENAME = "server.py"
 18 | 
 19 | async def run_tool_call(tool_name: str, arguments: Dict[str, Any]) -> None:
 20 |     """지정된 Tool 이름과 파라미터로 MCP 서버의 Tool을 호출하고 결과를 출력합니다."""
 21 | 
 22 |     # 서버 실행 설정: 현재 디렉토리의 SERVER_SCRIPT_FILENAME을 python으로 실행
 23 |     server_params = StdioServerParameters(
 24 |         command=sys.executable, # 현재 사용 중인 파이썬 인터프리터 사용
 25 |         args=[SERVER_SCRIPT_FILENAME],
 26 |         env=None, # 필요시 환경 변수 전달 가능
 27 |     )
 28 | 
 29 |     print(f"--- YouTube MCP 서버 호출 요청 ---")
 30 |     print(f"   이름: {tool_name}")
 31 |     print(f"   Arguments: {json.dumps(arguments, indent=2, ensure_ascii=False)}") # 인자 예쁘게 출력
 32 | 
 33 |     try:
 34 |         # stdio_client를 사용하여 서버 프로세스 시작 및 연결
 35 |         async with stdio_client(server_params) as (read, write):
 36 |             # ClientSession 생성
 37 |             async with ClientSession(read, write) as session:
 38 |                 # 서버와 초기 핸드셰이크 수행
 39 |                 await session.initialize()
 40 |                 logger.info("YouTube MCP 서버와 연결 초기화 완료.")
 41 | 
 42 |                 # (선택 사항) 사용 가능한 리소스 및 툴 목록 확인
 43 |                 try:
 44 |                     # 사용 가능한 리소스 확인
 45 |                     resources_info = await session.list_resources()
 46 |                     available_resources = [resource.name for resource in resources_info.resources] if hasattr(resources_info, 'resources') else []
 47 |                     available_resource_uris = [resource.uri for resource in resources_info.resources] if hasattr(resources_info, 'resources') else []
 48 |                     logger.info(f"서버에서 사용 가능한 Resources: {available_resources}")
 49 |                     logger.info(f"서버에서 사용 가능한 Resource URIs: {available_resource_uris}")
 50 | 
 51 |                     # 사용 가능한 툴 목록 확인
 52 |                     tools_info = await session.list_tools()
 53 |                     available_tools = [tool.name for tool in tools_info.tools] if hasattr(tools_info, 'tools') else []
 54 |                     logger.info(f"서버에서 사용 가능한 Tools: {available_tools}")
 55 | 
 56 |                     # 사용 가능한 프롬프트 확인
 57 |                     prompts_info = await session.list_prompts()
 58 |                     available_prompts = [prompt.name for prompt in prompts_info.prompts] if hasattr(prompts_info, 'prompts') else []
 59 |                     logger.info(f"서버에서 사용 가능한 Prompts: {available_prompts}")
 60 | 
 61 |                     # 지정된 이름이 어떤 타입인지 확인
 62 |                     is_tool = tool_name in available_tools
 63 |                     is_prompt = tool_name in available_prompts
 64 |                     is_resource = tool_name in available_resources
 65 |                     is_resource_uri = tool_name.startswith("youtube://")
 66 |                     
 67 |                     # 아무데도 없으면 경고
 68 |                     if not (is_tool or is_prompt or is_resource or is_resource_uri):
 69 |                         logger.warning(f"경고: 요청된 '{tool_name}'이 서버의 사용 가능 목록에 없습니다. 호출을 시도합니다.")
 70 | 
 71 |                 except Exception as e:
 72 |                     logger.warning(f"사용 가능한 리소스/Tool/Prompt 목록 조회 중 오류 발생: {e}")
 73 |                     is_tool = True  # 기본적으로 tool로 시도
 74 |                     is_prompt = False
 75 |                     is_resource = False
 76 |                     is_resource_uri = tool_name.startswith("youtube://")
 77 | 
 78 |                 # 결과 변수 초기화
 79 |                 result = None
 80 | 
 81 |                 # Tool 또는 Resource 호출
 82 |                 if is_resource_uri:
 83 |                     # Resource URI 직접 호출
 84 |                     logger.info(f"'{tool_name}' Resource URI 호출 중...")
 85 |                     result = await session.fetch_resource(tool_name, query_params=arguments)
 86 |                 elif is_tool:
 87 |                     # Tool 호출
 88 |                     logger.info(f"'{tool_name}' Tool 호출 중...")
 89 |                     result = await session.call_tool(tool_name, arguments=arguments)
 90 |                 elif is_prompt:
 91 |                     # Prompt 호출
 92 |                     logger.info(f"'{tool_name}' Prompt 호출 중...")
 93 |                     result = await session.call_prompt(tool_name, arguments=arguments)
 94 |                 else:
 95 |                     # 이름으로 Resource 호출 시도
 96 |                     logger.info(f"'{tool_name}' Resource 이름으로 호출 시도 중...")
 97 |                     try:
 98 |                         # 이름에 맞는 리소스 URI 찾기
 99 |                         matching_resource = next((r for r in resources_info.resources if r.name == tool_name), None)
100 |                         if matching_resource:
101 |                             result = await session.fetch_resource(matching_resource.uri, query_params=arguments)
102 |                         else:
103 |                             # 마지막 수단으로 툴 호출 시도
104 |                             result = await session.call_tool(tool_name, arguments=arguments)
105 |                     except Exception as resource_error:
106 |                         logger.error(f"Resource 호출 실패, tool로 시도: {resource_error}")
107 |                         result = await session.call_tool(tool_name, arguments=arguments)
108 | 
109 |                 logger.debug(f"호출 원시 결과: {result}") # 디버깅 시 상세 결과 확인
110 | 
111 |                 # 결과 출력
112 |                 print("\n--- 호출 결과 ---")
113 |                 if hasattr(result, 'content') and result.content:
114 |                     # 결과 내용이 여러 개일 수 있으므로 반복 처리
115 |                     for content_item in result.content:
116 |                         if hasattr(content_item, 'text'):
117 |                             # JSON 형식의 텍스트일 경우 파싱하여 예쁘게 출력 시도
118 |                             try:
119 |                                 parsed_json = json.loads(content_item.text)
120 |                                 print(json.dumps(parsed_json, indent=2, ensure_ascii=False))
121 |                             except json.JSONDecodeError:
122 |                                 # JSON 파싱 실패 시 원본 텍스트 출력
123 |                                 print(content_item.text)
124 |                         else:
125 |                             # text 속성이 없는 경우 객체 자체 출력
126 |                             print(content_item)
127 |                 elif hasattr(result, 'isError') and result.isError:
128 |                     print("오류 응답:")
129 |                     # isError가 True일 때 content가 있을 수 있음
130 |                     if hasattr(result, 'content') and result.content:
131 |                         for content_item in result.content:
132 |                             if hasattr(content_item, 'text'):
133 |                                 print(content_item.text)
134 |                             else:
135 |                                 print(content_item)
136 |                     else: # 오류지만 content가 없는 경우
137 |                         print("오류가 발생했으나 상세 내용이 없습니다.")
138 |                 elif hasattr(result, 'contents'):  # Resource 결과 형식
139 |                     # Resource 형식의 결과 처리
140 |                     for content in result.contents:
141 |                         if hasattr(content, 'text'):
142 |                             # JSON 형식 텍스트일 경우 파싱하여 예쁘게 출력 시도
143 |                             try:
144 |                                 parsed_json = json.loads(content.text)
145 |                                 print(json.dumps(parsed_json, indent=2, ensure_ascii=False))
146 |                             except json.JSONDecodeError:
147 |                                 # JSON 파싱 실패 시 원본 텍스트 출력
148 |                                 print(content.text)
149 |                         else:
150 |                             # text 속성이 없는 경우 객체 자체 출력
151 |                             print(content)
152 |                 else:
153 |                     # 예상치 못한 응답 형식
154 |                     print("예상치 못한 응답 형식:")
155 |                     print(result)
156 | 
157 |     except Exception as e:
158 |         print(f"\n--- 클라이언트 오류 발생 ---")
159 |         print(f"   오류 유형: {type(e).__name__}")
160 |         print(f"   오류 메시지: {e}")
161 | 
162 | if __name__ == "__main__":
163 |     # 터미널 인자 파싱
164 |     if len(sys.argv) < 2:
165 |         print(f"사용법: uv run client.py <name> [param1=value1] [param2=value2] ...")
166 |         print("\n<name>은 다음 중 하나일 수 있습니다:")
167 |         print("  1. Tool 이름")
168 |         print("  2. Prompt 이름")
169 |         print("  3. Resource 이름 또는 URI")
170 |         print("\n사용 가능한 Tool 이름:")
171 |         print("  search_videos, get_video_details, get_channel_details,")
172 |         print("  get_video_comments, get_video_transcript, get_related_videos, get_trending_videos, get_video_enhanced_transcript")
173 |         print("\n사용 가능한 Prompt 이름:")
174 |         print("  transcript_summary")
175 |         print("\n사용 가능한 Resource URI 예시:")
176 |         print("  youtube://available-youtube-tools")
177 |         print("  youtube://video/dQw4w9WgXcQ")
178 |         print("  youtube://channel/UC_x5XG1OV2P6uZZ5FSM9Ttw")
179 |         print("  youtube://transcript/dQw4w9WgXcQ?language=ko")
180 |         print("\n파라미터 형식:")
181 |         print("  key=value (띄어쓰기 없이)")
182 |         print("\n예시:")
183 |         print(f"  uv run client.py search_videos query=MCP max_results=5")
184 |         print(f"  uv run client.py get_video_details video_id=zRgAEIoZEVQ")
185 |         print(f"  uv run client.py get_channel_details channel_id=UCRpOIr-NJpK9S483ge20Pgw")
186 |         print(f"  uv run client.py get_video_comments video_id=zRgAEIoZEVQ max_results=10 order=time")
187 |         print(f"  uv run client.py get_video_transcript video_id=zRgAEIoZEVQ language=ko")
188 |         print(f"  uv run client.py get_related_videos video_id=zRgAEIoZEVQ max_results=5")
189 |         print(f"  uv run client.py get_trending_videos region_code=ko max_results=10")
190 |         print(f"  uv run client.py get_video_enhanced_transcript video_ids=zRgAEIoZEVQ language=ko format=timestamped include_metadata=true start_time=100 end_time=200 query=에이전트 case_sensitive=true segment_method=equal segment_count=2")
191 | 
192 |         sys.exit(1)
193 | 
194 |     tool_name = sys.argv[1]
195 |     arguments: Dict[str, Any] = {}
196 | 
197 |     # 추가 인자 파싱 (key=value)
198 |     if len(sys.argv) > 2:
199 |         for arg in sys.argv[2:]:
200 |             if "=" in arg:
201 |                 key, value = arg.split("=", 1)
202 |                 key = key.strip()
203 |                 value = value.strip()
204 | 
205 |                 # 배열 형태의 파라미터 처리 (쉼표로 구분)
206 |                 array_param_keys = ['video_ids']  # enhanced_transcript 도구는 video_ids를 리스트로 받음
207 |                 
208 |                 # 계층적 파라미터 처리 (예: filters.timeRange.start)
209 |                 if '.' in key:
210 |                     parts = key.split('.')
211 |                     # 첫 번째 계층이 없으면 생성
212 |                     if parts[0] not in arguments:
213 |                         arguments[parts[0]] = {}
214 |                     
215 |                     # 두 번째 계층이 없으면 생성
216 |                     current = arguments[parts[0]]
217 |                     for i in range(1, len(parts) - 1):
218 |                         if parts[i] not in current:
219 |                             current[parts[i]] = {}
220 |                         current = current[parts[i]]
221 |                     
222 |                     # 값 설정 (타입 변환 적용)
223 |                     final_key = parts[-1]
224 |                     if value.isdigit():
225 |                         current[final_key] = int(value)
226 |                     elif value.lower() in ['true', 'false']:
227 |                         current[final_key] = value.lower() == 'true'
228 |                     else:
229 |                         current[final_key] = value
230 |                 # 배열 파라미터 처리
231 |                 elif key in array_param_keys:
232 |                     arguments[key] = value.split(',')
233 |                 # 숫자형 파라미터 처리
234 |                 elif key in ['max_results'] and value.isdigit():
235 |                     arguments[key] = int(value)
236 |                 # 불리언 파라미터 처리
237 |                 elif key in ['include_replies', 'include_metadata'] and value.lower() in ['true', 'false']:
238 |                     arguments[key] = value.lower() == 'true'
239 |                 # 그 외 일반 문자열 파라미터
240 |                 else:
241 |                     arguments[key] = value
242 |             else:
243 |                 print(f"경고: 잘못된 파라미터 형식 무시됨 - '{arg}'. 'key=value' 형식을 사용하세요.")
244 | 
245 |     # 비동기 함수 실행
246 |     try:
247 |         asyncio.run(run_tool_call(tool_name, arguments))
248 |     except KeyboardInterrupt:
249 |         logger.info("사용자에 의해 클라이언트 실행이 중단되었습니다.")
250 | 


--------------------------------------------------------------------------------
/env.example:
--------------------------------------------------------------------------------
1 | YOUTUBE_API_KEY=your_youtube_api_key


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | def main():
2 |     print("Hello from py-mcp-youtube-toolbox!")
3 | 
4 | 
5 | if __name__ == "__main__":
6 |     main()
7 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "py-mcp-youtube-toolbox"
 3 | version = "0.1.0"
 4 | description = "YouTube Toolbox"
 5 | authors = [
 6 |     { name = "Anthony Kim", email = "jikime@gmail.com" },
 7 | ]
 8 | readme = "README.md"
 9 | license = { text = "Apache-2.0" }
10 | requires-python = ">=3.12"
11 | dependencies = [
12 |     "google-api-python-client>=2.169.0",
13 |     "mcp[cli]>=1.7.1",
14 |     "python-dotenv>=1.1.0",
15 |     "youtube-transcript-api>=1.0.3",
16 | ]
17 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | google-api-python-client>=2.169.0
2 | mcp[cli]>=1.7.1
3 | python-dotenv>=1.1.0
4 | youtube-transcript-api>=1.0.3 


--------------------------------------------------------------------------------
/server.py:
--------------------------------------------------------------------------------
   1 | import os
   2 | import json
   3 | import re
   4 | import logging
   5 | from logging.handlers import RotatingFileHandler
   6 | from typing import List, Dict, Any, Optional
   7 | 
   8 | # pydantic imports
   9 | from dotenv import load_dotenv
  10 | 
  11 | # Google API related imports
  12 | from googleapiclient.discovery import build
  13 | from googleapiclient.errors import HttpError
  14 | 
  15 | # YouTube transcript API
  16 | 
  17 | from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
  18 | 
  19 | # MCP related imports
  20 | from mcp.server.fastmcp import FastMCP
  21 | 
  22 | # Load environment variables
  23 | load_dotenv()
  24 | YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
  25 | 
  26 | # Configure logging
  27 | logger = logging.getLogger(__name__)
  28 | logger.setLevel(logging.INFO)
  29 | formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  30 | 
  31 | # Create log directory
  32 | log_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "logs")
  33 | os.makedirs(log_dir, exist_ok=True)
  34 | log_file = os.path.join(log_dir, "youtube_toolbox.log")
  35 | 
  36 | # Add file handler
  37 | file_handler = RotatingFileHandler(log_file, maxBytes=5*1024*1024, backupCount=5)
  38 | file_handler.setFormatter(formatter)
  39 | logger.addHandler(file_handler)
  40 | 
  41 | # Add console handler
  42 | console_handler = logging.StreamHandler()
  43 | console_handler.setFormatter(formatter)
  44 | logger.addHandler(console_handler)
  45 | 
  46 | # Check if YOUTUBE_API_KEY is available
  47 | if not YOUTUBE_API_KEY:
  48 |     logger.error("YOUTUBE_API_KEY environment variable is not set")
  49 |     raise ValueError("YOUTUBE_API_KEY environment variable is required")
  50 | 
  51 | # Create MCP server
  52 | mcp = FastMCP("YouTube Toolbox MCP Server")
  53 | logger.info("YouTube Toolbox MCP Server 준비 중...")
  54 | 
  55 | # Define prompt
  56 | @mcp.prompt(
  57 |     name="transcript_summary",
  58 |     description="Generate a summary of a YouTube video based on its transcript content with customizable options. This prompt provides different summary levels from brief overviews to detailed analyses, and can extract key topics from the content. Optimal for quickly understanding video content without watching the entire video."
  59 | )
  60 | async def transcript_summary(
  61 |     video_id: str,
  62 |     language: Optional[str] = None,
  63 |     summary_length: Optional[str] = None,
  64 |     include_keywords: Optional[str] = None
  65 | ) -> Dict[str, Any]:
  66 |     """
  67 |     Generate a summary of a YouTube video based on its transcript content with customizable options
  68 |     
  69 |     Args:
  70 |         video_id (str): The YouTube video ID
  71 |         language (str, optional): Language code for transcript (e.g., "en", "ko")
  72 |         summary_length (str, optional): Level of detail in summary ("short", "medium", or "detailed", default: "medium")
  73 |         include_keywords (str, optional): Whether to extract key topics (set to "true" to enable)
  74 |     
  75 |     Returns:
  76 |         Dict[str, Any]: Prompt configuration for the LLM
  77 |     """
  78 |     try:
  79 |         # Set defaults
  80 |         final_summary_length = summary_length or 'medium'
  81 |         should_include_keywords = include_keywords == 'true'
  82 |         
  83 |         # Get video details and transcript
  84 |         video_data = youtube_service.get_video_details(video_id)
  85 |         if not video_data or 'error' in video_data:
  86 |             return {
  87 |                 'messages': [{
  88 |                     'role': 'user',
  89 |                     'content': f"Error: Could not retrieve video details for ID {video_id}"
  90 |                 }]
  91 |             }
  92 |             
  93 |         video = video_data['items'][0] if 'items' in video_data and video_data['items'] else None
  94 |         
  95 |         # Get transcript data
  96 |         try:
  97 |             raw_transcript_data = youtube_service.get_video_transcript(video_id, language)
  98 |             
  99 |             # Format transcript text based on the actual structure
 100 |             transcript_text = ""
 101 |             
 102 |             if isinstance(raw_transcript_data, dict):
 103 |                 # Handle dictionary response (might have transcript or text key)
 104 |                 if 'transcript' in raw_transcript_data:
 105 |                     transcript_text = ' '.join([segment.get('text', '') for segment in raw_transcript_data['transcript']])
 106 |                 elif 'text' in raw_transcript_data:
 107 |                     transcript_text = raw_transcript_data['text']
 108 |             elif isinstance(raw_transcript_data, list):
 109 |                 # Handle list response (direct list of segment dictionaries)
 110 |                 transcript_text = ' '.join([item.get('text', '') for item in raw_transcript_data])
 111 |             else:
 112 |                 # Handle FetchedTranscript objects or other types
 113 |                 transcript_segments = []
 114 |                 for segment in raw_transcript_data:
 115 |                     text = getattr(segment, 'text', '')
 116 |                     transcript_segments.append(text)
 117 |                 transcript_text = ' '.join(transcript_segments)
 118 |             
 119 |             if not transcript_text:
 120 |                 return {
 121 |                     'messages': [{
 122 |                         'role': 'user',
 123 |                         'content': f"Error: Could not extract transcript text for video ID {video_id}."
 124 |                     }]
 125 |                 }
 126 |                 
 127 |         except Exception as e:
 128 |             logger.exception(f"Error getting transcript for video {video_id}: {e}")
 129 |             return {
 130 |                 'messages': [{
 131 |                     'role': 'user',
 132 |                     'content': f"Error: Could not retrieve transcript for video ID {video_id}. {str(e)}"
 133 |                 }]
 134 |             }
 135 |         
 136 |         # Define summary instructions based on length
 137 |         summary_instructions = ''
 138 |         if final_summary_length == 'short':
 139 |             summary_instructions = "Please provide a brief summary of this video in 3-5 sentences that captures the main idea."
 140 |         elif final_summary_length == 'detailed':
 141 |             summary_instructions = """Please provide a comprehensive summary of this video, including:
 142 | 1. A detailed overview of the main topics (at least 3-4 paragraphs)
 143 | 2. All important details, facts, and arguments presented
 144 | 3. The structure of the content and how ideas are developed
 145 | 4. The overall tone, style, and intended audience of the content
 146 | 5. Any conclusions or calls to action mentioned"""
 147 |         else:  # 'medium' or default
 148 |             summary_instructions = """Please provide:
 149 | 1. A concise summary of the main topics and key points
 150 | 2. Important details or facts presented
 151 | 3. The overall tone and style of the content"""
 152 |         
 153 |         # Add keywords extraction if requested
 154 |         if should_include_keywords:
 155 |             summary_instructions += """\n\nAlso extract and list 5-10 key topics, themes, or keywords from the content in the format:
 156 | KEY TOPICS: [comma-separated list of key topics/keywords]"""
 157 |         
 158 |         # Get video metadata
 159 |         video_title = video.get('snippet', {}).get('title', 'Unknown') if video else 'Unknown'
 160 |         channel_title = video.get('snippet', {}).get('channelTitle', 'Unknown') if video else 'Unknown'
 161 |         published_at = video.get('snippet', {}).get('publishedAt', 'Unknown') if video else 'Unknown'
 162 |         
 163 |         # Construct the prompt message
 164 |         prompt_message = f"""Please provide a {final_summary_length} summary of the following YouTube video transcript.
 165 | 
 166 | Video Title: {video_title}
 167 | Channel: {channel_title}
 168 | Published: {published_at}
 169 | 
 170 | Transcript:
 171 | {transcript_text}
 172 | 
 173 | {summary_instructions}"""
 174 |         
 175 |         return {
 176 |             'messages': [{
 177 |                 'role': 'user',
 178 |                 'content': prompt_message
 179 |             }]
 180 |         }
 181 |     except Exception as e:
 182 |         logger.exception(f"Error in transcript_summary prompt: {e}")
 183 |         return {
 184 |             'messages': [{
 185 |                 'role': 'user',
 186 |                 'content': f"Error creating transcript summary prompt: {str(e)}"
 187 |             }]
 188 |         }
 189 | 
 190 | class YouTubeService:
 191 |     """Service for interacting with YouTube API"""
 192 |     
 193 |     def __init__(self):
 194 |         self.youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
 195 |         
 196 |     def parse_url(self, url: str) -> str:
 197 |         """
 198 |         Parse the URL to get the video ID
 199 |         """
 200 |         video_id_match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url)
 201 |         if not video_id_match:
 202 |             return url
 203 |         video_id = video_id_match.group(1)
 204 |         
 205 |         return video_id
 206 |     
 207 |     def normalize_region_code(self, region_code: str) -> str:
 208 |         """
 209 |         Convert region codes to valid ISO 3166-1 alpha-2 country codes
 210 |         """
 211 |         if not region_code:
 212 |             return None
 213 |             
 214 |         # Common mappings for non-standard codes to standard ISO codes
 215 |         region_mapping = {
 216 |             'KO': 'KR',  # Korea
 217 |             'EN': 'US',  # English -> US as fallback
 218 |             'JP': 'JP',  # Japan
 219 |             'CN': 'CN',  # China
 220 |         }
 221 |         
 222 |         # Convert to uppercase
 223 |         region_code = region_code.upper()
 224 |         
 225 |         # Return mapped code or original if no mapping exists
 226 |         return region_mapping.get(region_code, region_code)
 227 |     
 228 |     def search_videos(self, query: str, max_results: int = 10, **options) -> Dict[str, Any]:
 229 |         """
 230 |         Search for YouTube videos based on query and options
 231 |         """
 232 |         try:
 233 |             search_params = {
 234 |                 'part': 'snippet',
 235 |                 'q': query,
 236 |                 'maxResults': max_results,
 237 |                 'type': options.get('type', 'video')
 238 |             }
 239 |             
 240 |             # Add optional parameters if provided
 241 |             for param in ['channelId', 'order', 'videoDuration', 'publishedAfter', 
 242 |                         'publishedBefore', 'videoCaption', 'videoDefinition', 'regionCode']:
 243 |                 if param in options and options[param]:
 244 |                     search_params[param] = options[param]
 245 |             
 246 |             response = self.youtube.search().list(**search_params).execute()
 247 |             return response
 248 |         except HttpError as e:
 249 |             logger.error(f"Error searching videos: {e}")
 250 |             raise e
 251 |     
 252 |     def get_video_details(self, video_id: str) -> Dict[str, Any]:
 253 |         """
 254 |         Get detailed information about a specific YouTube video
 255 |         """
 256 |         video_id = self.parse_url(video_id)
 257 |         
 258 |         try:
 259 |             response = self.youtube.videos().list(
 260 |                 part='snippet,contentDetails,statistics',
 261 |                 id=video_id
 262 |             ).execute()
 263 |             return response
 264 |         except HttpError as e:
 265 |             logger.error(f"Error getting video details: {e}")
 266 |             raise e
 267 |     
 268 |     def get_channel_details(self, channel_id: str) -> Dict[str, Any]:
 269 |         """
 270 |         Get detailed information about a specific YouTube channel
 271 |         """
 272 |         channel_id = self.parse_url(channel_id)
 273 |         
 274 |         try:
 275 |             response = self.youtube.channels().list(
 276 |                 part='snippet,statistics',
 277 |                 id=channel_id
 278 |             ).execute()
 279 |             return response
 280 |         except HttpError as e:
 281 |             logger.error(f"Error getting channel details: {e}")
 282 |             raise e
 283 |     
 284 |     def get_video_comments(self, video_id: str, max_results: int = 20, **options) -> Dict[str, Any]:
 285 |         """
 286 |         Get comments for a specific YouTube video
 287 |         """
 288 |         video_id = self.parse_url(video_id)
 289 |         
 290 |         try:
 291 |             params = {
 292 |                 'part': 'snippet',
 293 |                 'videoId': video_id,
 294 |                 'maxResults': max_results
 295 |             }
 296 |             
 297 |             if 'order' in options:
 298 |                 params['order'] = options['order']
 299 |                 
 300 |             if 'pageToken' in options:
 301 |                 params['pageToken'] = options['pageToken']
 302 |                 
 303 |             if options.get('includeReplies'):
 304 |                 params['part'] = 'snippet,replies'
 305 |                 
 306 |             response = self.youtube.commentThreads().list(**params).execute()
 307 |             return response
 308 |         except HttpError as e:
 309 |             logger.error(f"Error getting comments: {e}")
 310 |             raise e
 311 |     
 312 |     def get_video_transcript(self, video_id: str, language: Optional[str] = 'ko') -> List[Dict[str, Any]]:
 313 |         """
 314 |         Get transcript for a specific YouTube video
 315 |         """
 316 |         video_id = self.parse_url(video_id)
 317 |         
 318 |         try:
 319 |             if language:
 320 |                 transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
 321 |                 try:
 322 |                     transcript = transcript_list.find_transcript([language])
 323 |                     return transcript.fetch()
 324 |                 except NoTranscriptFound:
 325 |                     # Fallback to generated transcript if available
 326 |                     try:
 327 |                         transcript = transcript_list.find_generated_transcript([language])
 328 |                         return transcript.fetch()
 329 |                     except:
 330 |                         # Final fallback to any available transcript
 331 |                         transcript = transcript_list.find_transcript(['en'])
 332 |                         return transcript.fetch()
 333 |             else:
 334 |                 return YouTubeTranscriptApi.get_video_transcript(video_id)
 335 |                 
 336 |         except (TranscriptsDisabled, NoTranscriptFound) as e:
 337 |             logger.error(f"No transcript available for video {video_id}: {e}")
 338 |             return []
 339 |         except Exception as e:
 340 |             logger.error(f"Error getting transcript for video {video_id}: {e}")
 341 |             raise e
 342 | 
 343 |     def get_related_videos(self, video_id: str, max_results: Optional[int] = 10) -> Dict[str, Any]:
 344 |         """
 345 |         Get related videos for a specific YouTube video
 346 |         """
 347 |         video_id = self.parse_url(video_id)
 348 |         
 349 |         try:
 350 |             # Use search to find videos for a similar query to effectively get related content
 351 |             # First, get video details to use title for search
 352 |             video_details = self.get_video_details(video_id)
 353 |             if not video_details.get('items'):
 354 |                 raise ValueError(f"Video with ID {video_id} not found")
 355 |             
 356 |             video_title = video_details['items'][0]['snippet']['title']
 357 |             # Extract a few keywords from the title for search
 358 |             search_query = ' '.join(video_title.split()[:3]) if video_title else ''
 359 |             
 360 |             # Search for videos with similar content
 361 |             response = self.youtube.search().list(
 362 |                 part='snippet',
 363 |                 q=search_query,
 364 |                 type='video',
 365 |                 maxResults=max_results,
 366 |                 videoCategoryId=video_details['items'][0]['snippet'].get('categoryId', ''),
 367 |                 relevanceLanguage='en'  # Can be adjusted based on requirements
 368 |             ).execute()
 369 |             
 370 |             # Filter out the original video from results
 371 |             if 'items' in response:
 372 |                 response['items'] = [item for item in response['items'] 
 373 |                                     if item.get('id', {}).get('videoId') != video_id]
 374 |                 # Adjust result count if original video was filtered
 375 |                 if len(response['items']) < max_results:
 376 |                     response['pageInfo']['totalResults'] = len(response['items'])
 377 |                     response['pageInfo']['resultsPerPage'] = len(response['items'])
 378 |             
 379 |             # Add the search query to the response for reference
 380 |             response['searchQuery'] = search_query
 381 |             
 382 |             return response
 383 |         except HttpError as e:
 384 |             logger.error(f"Error getting related videos: {e}")
 385 |             raise e
 386 |           
 387 |             
 388 |     def get_trending_videos(self, region_code: Optional[str] = 'ko', max_results: Optional[int] = 5) -> Dict[str, Any]:
 389 |         """
 390 |         Get trending videos for a specific region
 391 |         """
 392 |         try:
 393 |             params = {
 394 |                 'part': 'snippet,contentDetails,statistics',
 395 |                 'chart': 'mostPopular',
 396 |                 'maxResults': max_results
 397 |             }
 398 |             
 399 |             if region_code:
 400 |                 # Normalize region code to ensure valid ISO country code format
 401 |                 normalized_code = self.normalize_region_code(region_code)
 402 |                 params['regionCode'] = normalized_code
 403 |                 
 404 |             response = self.youtube.videos().list(**params).execute()
 405 |             return response
 406 |         except HttpError as e:
 407 |             logger.error(f"Error getting trending videos: {e}")
 408 |             raise e
 409 |             
 410 |     def format_time(self, milliseconds: int) -> str:
 411 |         """
 412 |         Format milliseconds into a human-readable time string
 413 |         """
 414 |         seconds = int(milliseconds / 1000)
 415 |         minutes = int(seconds / 60)
 416 |         hours = int(minutes / 60)
 417 |         
 418 |         remaining_seconds = seconds % 60
 419 |         remaining_minutes = minutes % 60
 420 |         
 421 |         if hours > 0:
 422 |             return f"{hours:02d}:{remaining_minutes:02d}:{remaining_seconds:02d}"
 423 |         else:
 424 |             return f"{remaining_minutes:02d}:{remaining_seconds:02d}"
 425 | 
 426 |     def get_video_enhanced_transcript(self, video_ids: List[str], options: Dict[str, Any]) -> Dict[str, Any]:
 427 |         """
 428 |         Get enhanced transcript for one or more YouTube videos with advanced filtering and processing
 429 |         
 430 |         Args:
 431 |             video_ids (List[str]): List of YouTube video IDs
 432 |             options (Dict[str, Any]): Advanced options for transcript processing
 433 |                 - language (str, optional): Language code
 434 |                 - format (str, optional): Output format (raw, timestamped, merged)
 435 |                 - includeMetadata (bool, optional): Whether to include video details
 436 |                 - timeRange (Dict, optional): Time range filter with start and end in seconds
 437 |                 - search (Dict, optional): Search filter with query, caseSensitive, and contextLines
 438 |                 - segment (Dict, optional): Segmentation options with method and count
 439 |                 
 440 |         Returns:
 441 |             Dict[str, Any]: Enhanced transcript data
 442 |         """
 443 |         result = {
 444 |             "videos": [],
 445 |             "status": {
 446 |                 "success": True,
 447 |                 "message": "Transcripts processed successfully",
 448 |                 "failedCount": 0,
 449 |                 "successCount": 0
 450 |             }
 451 |         }
 452 |         
 453 |         # Process options
 454 |         language = options.get('language')
 455 |         format_type = options.get('format', 'timestamped')
 456 |         include_metadata = options.get('includeMetadata', False)
 457 |         time_range = options.get('timeRange')
 458 |         search_filter = options.get('search')
 459 |         segment_options = options.get('segment')
 460 |         
 461 |         # Process each video
 462 |         for video_id in video_ids:
 463 |             video_result = {"videoId": video_id}
 464 |             
 465 |             try:
 466 |                 # Get video details if metadata requested
 467 |                 if include_metadata:
 468 |                     video_data = self.get_video_details(video_id)
 469 |                     if not video_data.get('items'):
 470 |                         video_result["error"] = f"Video with ID {video_id} not found"
 471 |                         result["videos"].append(video_result)
 472 |                         result["status"]["failedCount"] += 1
 473 |                         continue
 474 |                         
 475 |                     video = video_data['items'][0]
 476 |                     video_result["metadata"] = {
 477 |                         'id': video.get('id'),
 478 |                         'title': video.get('snippet', {}).get('title'),
 479 |                         'channelTitle': video.get('snippet', {}).get('channelTitle'),
 480 |                         'publishedAt': video.get('snippet', {}).get('publishedAt'),
 481 |                         'duration': video.get('contentDetails', {}).get('duration')
 482 |                     }
 483 |                 
 484 |                 # Call the get_video_transcript method which returns transcript data
 485 |                 raw_transcript_data = self.get_video_transcript(video_id, language)
 486 |                 
 487 |                 # Check if transcript was fetched successfully
 488 |                 if not raw_transcript_data or (isinstance(raw_transcript_data, dict) and 'error' in raw_transcript_data):
 489 |                     error_msg = raw_transcript_data.get('error', "Failed to retrieve transcript") if isinstance(raw_transcript_data, dict) else "Failed to retrieve transcript"
 490 |                     video_result["error"] = error_msg
 491 |                     result["videos"].append(video_result)
 492 |                     result["status"]["failedCount"] += 1
 493 |                     continue
 494 |                 
 495 |                 # Get transcript segments - adapt to different response formats
 496 |                 if isinstance(raw_transcript_data, dict) and 'transcript' in raw_transcript_data:
 497 |                     # If it's a dictionary with transcript key (from existing get_video_transcript method)
 498 |                     segments = raw_transcript_data['transcript']
 499 |                 elif isinstance(raw_transcript_data, dict) and 'text' in raw_transcript_data:
 500 |                     # If the get_video_transcript method returned a formatted response with 'text'
 501 |                     # This is a fallback case
 502 |                     segments = []
 503 |                     video_result["error"] = "Transcript format not supported"
 504 |                     result["videos"].append(video_result)
 505 |                     result["status"]["failedCount"] += 1
 506 |                     continue
 507 |                 elif isinstance(raw_transcript_data, list):
 508 |                     # If it returned a list directly (might happen in some cases)
 509 |                     segments = []
 510 |                     for item in raw_transcript_data:
 511 |                         segments.append({
 512 |                             'text': item.get('text', ''),
 513 |                             'start': item.get('start', 0),
 514 |                             'duration': item.get('duration', 0),
 515 |                             'timestamp': self.format_time(int(item.get('start', 0) * 1000))
 516 |                         })
 517 |                 else:
 518 |                     # This handles the FetchedTranscript objects from YouTubeTranscriptApi
 519 |                     # that don't have a .get() method
 520 |                     segments = []
 521 |                     for segment in raw_transcript_data:
 522 |                         text = getattr(segment, 'text', '')
 523 |                         start = getattr(segment, 'start', 0)
 524 |                         duration = getattr(segment, 'duration', 0)
 525 |                         
 526 |                         segments.append({
 527 |                             'text': text,
 528 |                             'start': start,
 529 |                             'duration': duration,
 530 |                             'timestamp': self.format_time(int(start * 1000))
 531 |                         })
 532 |                 
 533 |                 # Apply time range filter if specified
 534 |                 if time_range:
 535 |                     start_time = time_range.get('start')
 536 |                     end_time = time_range.get('end')
 537 |                     
 538 |                     if start_time is not None:
 539 |                         segments = [s for s in segments if (s['start'] + s['duration']) >= start_time]
 540 |                     
 541 |                     if end_time is not None:
 542 |                         segments = [s for s in segments if s['start'] <= end_time]
 543 |                 
 544 |                 # Apply search filter if specified
 545 |                 if search_filter and segments:
 546 |                     query = search_filter.get('query', '')
 547 |                     case_sensitive = search_filter.get('caseSensitive', False)
 548 |                     context_lines = search_filter.get('contextLines', 0)
 549 |                     
 550 |                     if query:
 551 |                         # Search in segments
 552 |                         matched_indices = []
 553 |                         search_query = query if case_sensitive else query.lower()
 554 |                         
 555 |                         for i, segment in enumerate(segments):
 556 |                             text = segment['text'] if case_sensitive else segment['text'].lower()
 557 |                             if search_query in text:
 558 |                                 matched_indices.append(i)
 559 |                         
 560 |                         # Include context lines
 561 |                         if context_lines > 0:
 562 |                             expanded_indices = set()
 563 |                             for idx in matched_indices:
 564 |                                 # Add the context lines before and after
 565 |                                 for i in range(max(0, idx - context_lines), min(len(segments), idx + context_lines + 1)):
 566 |                                     expanded_indices.add(i)
 567 |                             
 568 |                             matched_indices = sorted(expanded_indices)
 569 |                         
 570 |                         # Filter segments by matched indices
 571 |                         segments = [segments[i] for i in matched_indices]
 572 |                 
 573 |                 # Apply segmentation if specified
 574 |                 if segment_options and segments:
 575 |                     method = segment_options.get('method', 'equal')
 576 |                     count = segment_options.get('count', 1)
 577 |                     
 578 |                     if method == 'equal' and count > 1:
 579 |                         # Divide into equal parts
 580 |                         segment_size = len(segments) // count
 581 |                         segmented_transcript = []
 582 |                         
 583 |                         for i in range(count):
 584 |                             start_idx = i * segment_size
 585 |                             end_idx = start_idx + segment_size if i < count - 1 else len(segments)
 586 |                             
 587 |                             segment_chunks = segments[start_idx:end_idx]
 588 |                             if segment_chunks:  # Only add non-empty segments
 589 |                                 segmented_transcript.append({
 590 |                                     "index": i,
 591 |                                     "segments": segment_chunks,
 592 |                                     "text": " ".join([s['text'] for s in segment_chunks])
 593 |                                 })
 594 |                         
 595 |                         video_result["segments"] = segmented_transcript
 596 |                     elif method == 'smart' and count > 1:
 597 |                         # Use a smarter segmentation approach
 598 |                         # For simplicity, we'll use a basic approach dividing by total character count
 599 |                         total_text = " ".join([s['text'] for s in segments])
 600 |                         total_chars = len(total_text)
 601 |                         chars_per_segment = total_chars // count
 602 |                         
 603 |                         segmented_transcript = []
 604 |                         current_segment = []
 605 |                         current_chars = 0
 606 |                         segment_idx = 0
 607 |                         
 608 |                         for s in segments:
 609 |                             current_segment.append(s)
 610 |                             current_chars += len(s['text'])
 611 |                             
 612 |                             if current_chars >= chars_per_segment and segment_idx < count - 1:
 613 |                                 segmented_transcript.append({
 614 |                                     "index": segment_idx,
 615 |                                     "segments": current_segment,
 616 |                                     "text": " ".join([seg['text'] for seg in current_segment])
 617 |                                 })
 618 |                                 segment_idx += 1
 619 |                                 current_segment = []
 620 |                                 current_chars = 0
 621 |                         
 622 |                         # Add the last segment if not empty
 623 |                         if current_segment:
 624 |                             segmented_transcript.append({
 625 |                                 "index": segment_idx,
 626 |                                 "segments": current_segment,
 627 |                                 "text": " ".join([seg['text'] for seg in current_segment])
 628 |                             })
 629 |                         
 630 |                         video_result["segments"] = segmented_transcript
 631 |                 
 632 |                 # Format transcript based on format type
 633 |                 if format_type == 'raw':
 634 |                     video_result["transcript"] = segments
 635 |                 elif format_type == 'timestamped':
 636 |                     video_result["transcript"] = [
 637 |                         f"[{s['timestamp']}] {s['text']}" for s in segments
 638 |                     ]
 639 |                 elif format_type == 'merged':
 640 |                     video_result["transcript"] = " ".join([s['text'] for s in segments])
 641 |                 
 642 |                 # Store statistics
 643 |                 video_result["statistics"] = {
 644 |                     "segmentCount": len(segments),
 645 |                     "totalDuration": sum([s['duration'] for s in segments]),
 646 |                     "averageSegmentLength": sum([len(s['text']) for s in segments]) / len(segments) if segments else 0
 647 |                 }
 648 |                 
 649 |                 result["videos"].append(video_result)
 650 |                 result["status"]["successCount"] += 1
 651 |                 
 652 |             except Exception as e:
 653 |                 logger.exception(f"Error processing transcript for video {video_id}: {e}")
 654 |                 video_result["error"] = str(e)
 655 |                 result["videos"].append(video_result)
 656 |                 result["status"]["failedCount"] += 1
 657 |         
 658 |         # Update overall status
 659 |         if result["status"]["failedCount"] > 0:
 660 |             if result["status"]["successCount"] == 0:
 661 |                 result["status"]["success"] = False
 662 |                 result["status"]["message"] = "All transcript requests failed"
 663 |             else:
 664 |                 result["status"]["message"] = f"Partially successful ({result['status']['failedCount']} failed, {result['status']['successCount']} succeeded)"
 665 |         
 666 |         return result
 667 | 
 668 | # Initialize YouTube service
 669 | youtube_service = YouTubeService()
 670 | 
 671 | # Define resource
 672 | @mcp.resource(
 673 |     uri='youtube://available-youtube-tools', 
 674 |     name="available-youtube-tools", 
 675 |     description="Returns a list of YouTube tools available on this MCP server."
 676 | )
 677 | async def get_available_youtube_tools() -> List[Dict[str, str]]:
 678 |     """Returns a list of YouTube tools available on this MCP server."""
 679 |     available_tools = [
 680 |         {"name": "search_videos", "description": "Search for YouTube videos with advanced filtering options"},
 681 |         {"name": "get_video_details", "description": "Get detailed information about a YouTube video"},
 682 |         {"name": "get_channel_details", "description": "Get detailed information about a YouTube channel"},
 683 |         {"name": "get_video_comments", "description": "Get comments for a YouTube video"},
 684 |         {"name": "get_video_transcript", "description": "Get transcript/captions for a YouTube video"},
 685 |         {"name": "get_related_videos", "description": "Get videos related to a specific YouTube video"},
 686 |         {"name": "get_trending_videos", "description": "Get trending videos on YouTube by region"},
 687 |         {"name": "get_video_enhanced_transcript", "description": "Advanced transcript extraction tool with filtering, search, and multi-video capabilities. Provides rich transcript data for detailed analysis and processing. Features: 1) Extract transcripts from multiple videos; 2) Filter by time ranges; 3) Search within transcripts; 4) Segment transcripts; 5) Format output in different ways; 6) Include video metadata."}
 688 |     ]
 689 |     
 690 |     logger.info(f"Resource 'get_available_youtube_tools' called. Returning {len(available_tools)} tools.")
 691 |     return available_tools
 692 | 
 693 | @mcp.resource(
 694 |     uri='youtube://video/{video_id}',
 695 |     name="video",
 696 |     description="Get detailed information about a specific YouTube video by ID"
 697 | )
 698 | async def get_video_resource(video_id: str) -> str:
 699 |     """
 700 |     Resource for getting detailed information about a specific YouTube video
 701 |     
 702 |     Args:
 703 |         video_id (str): YouTube video ID
 704 |     
 705 |     Returns:
 706 |         Dict[str, Any]: Video details resource
 707 |     """
 708 | 
 709 |     try:
 710 |         video_data = youtube_service.get_video_details(video_id)
 711 |         
 712 |         if not video_data.get('items'):
 713 |             return {
 714 |                 "contents": [{
 715 |                     "uri": f"youtube://video/{video_id}",
 716 |                     "text": f"Video with ID {video_id} not found."
 717 |                 }]
 718 |             }
 719 |             
 720 |         video = video_data['items'][0]
 721 |         
 722 |         # Format the response
 723 |         details = {
 724 |             'id': video.get('id'),
 725 |             'title': video.get('snippet', {}).get('title'),
 726 |             'description': video.get('snippet', {}).get('description'),
 727 |             'publishedAt': video.get('snippet', {}).get('publishedAt'),
 728 |             'channelId': video.get('snippet', {}).get('channelId'),
 729 |             'channelTitle': video.get('snippet', {}).get('channelTitle'),
 730 |             'viewCount': video.get('statistics', {}).get('viewCount'),
 731 |             'likeCount': video.get('statistics', {}).get('likeCount'),
 732 |             'commentCount': video.get('statistics', {}).get('commentCount'),
 733 |             'duration': video.get('contentDetails', {}).get('duration')
 734 |         }
 735 |         
 736 |         return {
 737 |             "contents": [{
 738 |                 "uri": f"youtube://video/{video_id}",
 739 |                 "text": json.dumps(details, indent=2)
 740 |             }]
 741 |         }
 742 |     except Exception as e:
 743 |         logger.exception(f"Error in get_video_details: {e}")
 744 |         return {
 745 |             "contents": [{
 746 |                 "uri": f"youtube://video/{video_id}",
 747 |                 "text": f"Error fetching video details: {str(e)}"
 748 |             }]
 749 |         }
 750 | 
 751 | @mcp.resource(
 752 |     uri='youtube://channel/{channel_id}',
 753 |     name="channel",
 754 |     description="Get information about a specific YouTube channel by ID"
 755 | )
 756 | async def get_channel_resource(channel_id: str) -> Dict[str, Any]:
 757 |     """
 758 |     Resource for getting information about a specific YouTube channel
 759 |     
 760 |     Args:
 761 |         channel_id (str): YouTube channel ID
 762 |     
 763 |     Returns:
 764 |         Dict[str, Any]: Channel details resource
 765 |     """
 766 |     try:
 767 |         channel_data = youtube_service.get_channel_details(channel_id)
 768 |         
 769 |         if not channel_data.get('items'):
 770 |             return {
 771 |                 "contents": [{
 772 |                     "uri": f"youtube://channel/{channel_id}",
 773 |                     "text": f"Channel with ID {channel_id} not found."
 774 |                 }]
 775 |             }
 776 |             
 777 |         channel = channel_data['items'][0]
 778 |         
 779 |         # Format the response
 780 |         details = {
 781 |             'id': channel.get('id'),
 782 |             'title': channel.get('snippet', {}).get('title'),
 783 |             'description': channel.get('snippet', {}).get('description'),
 784 |             'publishedAt': channel.get('snippet', {}).get('publishedAt'),
 785 |             'subscriberCount': channel.get('statistics', {}).get('subscriberCount'),
 786 |             'videoCount': channel.get('statistics', {}).get('videoCount'),
 787 |             'viewCount': channel.get('statistics', {}).get('viewCount')
 788 |         }
 789 |         
 790 |         return {
 791 |             "contents": [{
 792 |                 "uri": f"youtube://channel/{channel_id}",
 793 |                 "text": json.dumps(details, indent=2)
 794 |             }]
 795 |         }
 796 |     except Exception as e:
 797 |         logger.exception(f"Error in get_channel_resource: {e}")
 798 |         return {
 799 |             "contents": [{
 800 |                 "uri": f"youtube://channel/{channel_id}",
 801 |                 "text": f"Error fetching channel details: {str(e)}"
 802 |             }]
 803 |         }
 804 | 
 805 | @mcp.resource(
 806 |     uri='youtube://transcript/{video_id}?language={language}',
 807 |     name="transcript",
 808 |     description="Get the transcript/captions for a specific YouTube video",
 809 | )
 810 | async def get_video_transcript_resource(video_id: str, language: Optional[str] = None) -> Dict[str, Any]:
 811 |     """
 812 |     Resource for getting transcript/captions for a specific YouTube video
 813 |     
 814 |     Args:
 815 |         video_id (str): YouTube video ID
 816 |         language (str, optional): Language code for transcript
 817 |     
 818 |     Returns:
 819 |         Dict[str, Any]: Transcript resource
 820 |     """
 821 |     try:
 822 |         # Get video details for metadata
 823 |         video_data = youtube_service.get_video_details(video_id)
 824 |         
 825 |         if not video_data.get('items'):
 826 |             return {
 827 |                 "contents": [{
 828 |                     "uri": f"youtube://transcript/{video_id}",
 829 |                     "text": f"Video with ID {video_id} not found."
 830 |                 }]
 831 |             }
 832 |             
 833 |         video = video_data['items'][0]
 834 |         
 835 |         try:
 836 |             # Get transcript
 837 |             transcript_data = youtube_service.get_video_transcript(video_id, language)
 838 |             
 839 |             # Format transcript with timestamps
 840 |             formatted_transcript = []
 841 |             for segment in transcript_data:
 842 |                 # FetchedTranscriptSnippet 객체에서 속성으로 접근
 843 |                 text = getattr(segment, 'text', '')
 844 |                 start = getattr(segment, 'start', 0)
 845 |                 duration = getattr(segment, 'duration', 0)
 846 |                 
 847 |                 formatted_transcript.append({
 848 |                     'text': text,
 849 |                     'start': start,
 850 |                     'duration': duration,
 851 |                     'timestamp': youtube_service.format_time(int(start * 1000))
 852 |                 })
 853 |             
 854 |             # Create metadata
 855 |             metadata = {
 856 |                 'videoId': video.get('id'),
 857 |                 'title': video.get('snippet', {}).get('title'),
 858 |                 'channelTitle': video.get('snippet', {}).get('channelTitle'),
 859 |                 'language': language or 'default',
 860 |                 'segmentCount': len(transcript_data)
 861 |             }
 862 |             
 863 |             # Create timestamped text version
 864 |             timestamped_text = "\n".join([
 865 |                 f"[{item['timestamp']}] {item['text']}" 
 866 |                 for item in formatted_transcript
 867 |             ])
 868 |             
 869 |             return {
 870 |                 "contents": [{
 871 |                     "uri": f"youtube://transcript/{video_id}",
 872 |                     "text": f"# Transcript for: {metadata['title']}\n\n{timestamped_text}"
 873 |                 }],
 874 |                 "metadata": metadata
 875 |             }
 876 |         except Exception as e:
 877 |             return {
 878 |                 "contents": [{
 879 |                     "uri": f"youtube://transcript/{video_id}",
 880 |                     "text": f"Transcript not available for video ID {video_id}. Error: {str(e)}"
 881 |                 }]
 882 |             }
 883 |     except Exception as e:
 884 |         logger.exception(f"Error in get_video_transcript_resource: {e}")
 885 |         return {
 886 |             "contents": [{
 887 |                 "uri": f"youtube://transcript/{video_id}",
 888 |                 "text": f"Error fetching transcript: {str(e)}"
 889 |             }]
 890 |         }
 891 | 
 892 | # Define tools
 893 | @mcp.tool(
 894 |     name="search_videos",
 895 |     description="Search for YouTube videos with advanced filtering options",
 896 | )
 897 | async def search_videos(
 898 |     query: str, 
 899 |     max_results: Optional[int] = 10, 
 900 |     channel_id: Optional[str] = None,
 901 |     order: Optional[str] = None,
 902 |     video_duration: Optional[str] = None,
 903 |     published_after: Optional[str] = None,
 904 |     published_before: Optional[str] = None,
 905 |     video_caption: Optional[str] = None,
 906 |     video_definition: Optional[str] = None,
 907 |     region_code: Optional[str] = None
 908 | ) -> Dict[str, Any]:
 909 |     """
 910 |     Search for YouTube videos with advanced filtering options
 911 |     
 912 |     Args:
 913 |         query (str): Search term
 914 |         max_results (int): Number of results to return (1-50)
 915 |         channel_id (str, optional): Filter by specific channel
 916 |         order (str, optional): Sort by date, rating, viewCount, relevance, title
 917 |         video_duration (str, optional): Filter by length (short: <4min, medium: 4-20min, long: >20min)
 918 |         published_after (str, optional): Filter by publish date after (ISO format)
 919 |         published_before (str, optional): Filter by publish date before (ISO format)
 920 |         video_caption (str, optional): Filter by caption availability
 921 |         video_definition (str, optional): Filter by quality (standard/high)
 922 |         region_code (str, optional): Filter by country (ISO country code)
 923 |     
 924 |     Returns:
 925 |         Dict[str, Any]: Search results
 926 |     """
 927 |     try:
 928 |         options = {
 929 |             'channelId': channel_id,
 930 |             'order': order,
 931 |             'videoDuration': video_duration,
 932 |             'publishedAfter': published_after,
 933 |             'publishedBefore': published_before,
 934 |             'videoCaption': video_caption,
 935 |             'videoDefinition': video_definition,
 936 |             'regionCode': region_code
 937 |         }
 938 |         
 939 |         search_results = youtube_service.search_videos(query, max_results, **options)
 940 |         
 941 |         # Format the response
 942 |         formatted_results = []
 943 |         for item in search_results.get('items', []):
 944 |             video_id = item.get('id', {}).get('videoId')
 945 |             
 946 |             formatted_results.append({
 947 |                 'videoId': video_id,
 948 |                 'title': item.get('snippet', {}).get('title'),
 949 |                 'channelId': item.get('snippet', {}).get('channelId'),
 950 |                 'channelTitle': item.get('snippet', {}).get('channelTitle'),
 951 |                 'publishedAt': item.get('snippet', {}).get('publishedAt'),
 952 |                 'description': item.get('snippet', {}).get('description'),
 953 |                 'thumbnails': item.get('snippet', {}).get('thumbnails'),
 954 |                 'url': f"https://www.youtube.com/watch?v={video_id}"
 955 |             })
 956 |             
 957 |         return {
 958 |             'items': formatted_results,
 959 |             'totalResults': search_results.get('pageInfo', {}).get('totalResults', 0),
 960 |             'nextPageToken': search_results.get('nextPageToken')
 961 |         }
 962 |     except Exception as e:
 963 |         logger.exception(f"Error in search_videos: {e}")
 964 |         return {'error': str(e)}
 965 | 
 966 | @mcp.tool(
 967 |     name="get_video_details",
 968 |     description="Get detailed information about a YouTube video",
 969 | )
 970 | async def get_video_details(video_id: str) -> Dict[str, Any]:
 971 |     """
 972 |     Get detailed information about a YouTube video
 973 |     
 974 |     Args:
 975 |         video_id (str): YouTube video ID
 976 |     
 977 |     Returns:
 978 |         Dict[str, Any]: Video details
 979 |     """
 980 |     try:
 981 |         video_data = youtube_service.get_video_details(video_id)
 982 |         
 983 |         if not video_data.get('items'):
 984 |             return {'error': f"Video with ID {video_id} not found"}
 985 |             
 986 |         video = video_data['items'][0]
 987 |         
 988 |         # Format the response
 989 |         details = {
 990 |             'id': video.get('id'),
 991 |             'title': video.get('snippet', {}).get('title'),
 992 |             'description': video.get('snippet', {}).get('description'),
 993 |             'publishedAt': video.get('snippet', {}).get('publishedAt'),
 994 |             'channelId': video.get('snippet', {}).get('channelId'),
 995 |             'channelTitle': video.get('snippet', {}).get('channelTitle'),
 996 |             'tags': video.get('snippet', {}).get('tags', []),
 997 |             'viewCount': video.get('statistics', {}).get('viewCount'),
 998 |             'likeCount': video.get('statistics', {}).get('likeCount'),
 999 |             'commentCount': video.get('statistics', {}).get('commentCount'),
1000 |             'duration': video.get('contentDetails', {}).get('duration'),
1001 |             'dimension': video.get('contentDetails', {}).get('dimension'),
1002 |             'definition': video.get('contentDetails', {}).get('definition'),
1003 |             'thumbnails': video.get('snippet', {}).get('thumbnails'),
1004 |             'url': f"https://www.youtube.com/watch?v={video_id}"
1005 |         }
1006 |         
1007 |         return details
1008 |     except Exception as e:
1009 |         logger.exception(f"Error in get_video_details: {e}")
1010 |         return {'error': str(e)}
1011 | 
1012 | @mcp.tool(
1013 |     name="get_channel_details",
1014 |     description="Get detailed information about a YouTube channel",
1015 | )
1016 | async def get_channel_details(channel_id: str) -> Dict[str, Any]:
1017 |     """
1018 |     Get detailed information about a YouTube channel
1019 |     
1020 |     Args:
1021 |         channel_id (str): YouTube channel ID
1022 |     
1023 |     Returns:
1024 |         Dict[str, Any]: Channel details
1025 |     """
1026 |     try:
1027 |         channel_data = youtube_service.get_channel_details(channel_id)
1028 |         
1029 |         if not channel_data.get('items'):
1030 |             return {'error': f"Channel with ID {channel_id} not found"}
1031 |             
1032 |         channel = channel_data['items'][0]
1033 |         
1034 |         # Format the response
1035 |         details = {
1036 |             'id': channel.get('id'),
1037 |             'title': channel.get('snippet', {}).get('title'),
1038 |             'description': channel.get('snippet', {}).get('description'),
1039 |             'publishedAt': channel.get('snippet', {}).get('publishedAt'),
1040 |             'customUrl': channel.get('snippet', {}).get('customUrl'),
1041 |             'thumbnails': channel.get('snippet', {}).get('thumbnails'),
1042 |             'subscriberCount': channel.get('statistics', {}).get('subscriberCount'),
1043 |             'videoCount': channel.get('statistics', {}).get('videoCount'),
1044 |             'viewCount': channel.get('statistics', {}).get('viewCount'),
1045 |             'url': f"https://www.youtube.com/channel/{channel_id}"
1046 |         }
1047 |         
1048 |         return details
1049 |     except Exception as e:
1050 |         logger.exception(f"Error in get_channel_details: {e}")
1051 |         return {'error': str(e)}
1052 | 
1053 | @mcp.tool(
1054 |     name="get_video_comments",
1055 |     description="Get comments for a YouTube video",
1056 | )
1057 | async def get_video_comments(
1058 |     video_id: str, 
1059 |     max_results: Optional[int] = 20, 
1060 |     order: Optional[str] = "relevance", 
1061 |     include_replies: bool = False,
1062 |     page_token: Optional[str] = None
1063 | ) -> Dict[str, Any]:
1064 |     """
1065 |     Get comments for a YouTube video
1066 |     
1067 |     Args:
1068 |         video_id (str): YouTube video ID
1069 |         max_results (int): Maximum number of comments to return (default: 20)
1070 |         order (str): Order by 'relevance' (default) or 'time'
1071 |         include_replies (bool): Whether to include replies to comments
1072 |         page_token (str, optional): Token for paginated results
1073 |     
1074 |     Returns:
1075 |         Dict[str, Any]: Comments data
1076 |     """
1077 |     try:
1078 |         options = {
1079 |             'order': order,
1080 |             'includeReplies': include_replies,
1081 |         }
1082 |         
1083 |         if page_token:
1084 |             options['pageToken'] = page_token
1085 |             
1086 |         comments_data = youtube_service.get_video_comments(video_id, max_results, **options)
1087 |         
1088 |         # Format the response
1089 |         formatted_comments = []
1090 |         for item in comments_data.get('items', []):
1091 |             comment = item.get('snippet', {}).get('topLevelComment', {}).get('snippet', {})
1092 |             
1093 |             formatted_comment = {
1094 |                 'id': item.get('id'),
1095 |                 'text': comment.get('textDisplay'),
1096 |                 'author': comment.get('authorDisplayName'),
1097 |                 'authorProfileImageUrl': comment.get('authorProfileImageUrl'),
1098 |                 'likeCount': comment.get('likeCount'),
1099 |                 'publishedAt': comment.get('publishedAt'),
1100 |                 'updatedAt': comment.get('updatedAt'),
1101 |                 'replyCount': item.get('snippet', {}).get('totalReplyCount', 0)
1102 |             }
1103 |             
1104 |             # Include replies if requested and available
1105 |             if include_replies and 'replies' in item:
1106 |                 reply_comments = []
1107 |                 for reply in item.get('replies', {}).get('comments', []):
1108 |                     reply_snippet = reply.get('snippet', {})
1109 |                     reply_comments.append({
1110 |                         'id': reply.get('id'),
1111 |                         'text': reply_snippet.get('textDisplay'),
1112 |                         'author': reply_snippet.get('authorDisplayName'),
1113 |                         'authorProfileImageUrl': reply_snippet.get('authorProfileImageUrl'),
1114 |                         'likeCount': reply_snippet.get('likeCount'),
1115 |                         'publishedAt': reply_snippet.get('publishedAt'),
1116 |                         'updatedAt': reply_snippet.get('updatedAt')
1117 |                     })
1118 |                 
1119 |                 formatted_comment['replies'] = reply_comments
1120 |                 
1121 |             formatted_comments.append(formatted_comment)
1122 |             
1123 |         return {
1124 |             'comments': formatted_comments,
1125 |             'nextPageToken': comments_data.get('nextPageToken'),
1126 |             'totalResults': comments_data.get('pageInfo', {}).get('totalResults', 0)
1127 |         }
1128 |     except Exception as e:
1129 |         logger.exception(f"Error in get_video_comments: {e}")
1130 |         return {'error': str(e)}
1131 | 
1132 | @mcp.tool(
1133 |     name="get_video_transcript",
1134 |     description="Get transcript/captions for a YouTube video",
1135 | )
1136 | async def get_video_transcript(video_id: str, language: Optional[str] = 'ko') -> Dict[str, Any]:
1137 |     """
1138 |     Get transcript/captions for a YouTube video
1139 |     
1140 |     Args:
1141 |         video_id (str): YouTube video ID
1142 |         language (str, optional): Language code (e.g., 'en', 'ko', 'fr')
1143 |     
1144 |     Returns:
1145 |         Dict[str, Any]: Transcript data
1146 |     """
1147 |     try:
1148 |         # Get video details for metadata
1149 |         video_data = youtube_service.get_video_details(video_id)
1150 |         
1151 |         if not video_data.get('items'):
1152 |             return {'error': f"Video with ID {video_id} not found"}
1153 |             
1154 |         video = video_data['items'][0]
1155 |         
1156 |         # Get transcript
1157 |         try:
1158 |             transcript_data = youtube_service.get_video_transcript(video_id, language)
1159 |             
1160 |             # Format transcript with timestamps
1161 |             formatted_transcript = []
1162 |             for segment in transcript_data:
1163 |                 text = getattr(segment, 'text', '')
1164 |                 start = getattr(segment, 'start', 0)
1165 |                 duration = getattr(segment, 'duration', 0)
1166 |                 
1167 |                 formatted_transcript.append({
1168 |                     'text': text,
1169 |                     'start': start,
1170 |                     'duration': duration,
1171 |                     'timestamp': youtube_service.format_time(int(start * 1000))
1172 |                 })
1173 |             
1174 |             # Create metadata
1175 |             metadata = {
1176 |                 'videoId': video.get('id'),
1177 |                 'title': video.get('snippet', {}).get('title'),
1178 |                 'channelTitle': video.get('snippet', {}).get('channelTitle'),
1179 |                 'language': language or 'default',
1180 |                 'segmentCount': len(transcript_data)
1181 |             }
1182 |             
1183 |             # Create timestamped text version
1184 |             timestamped_text = "\n".join([
1185 |                 f"[{item['timestamp']}] {item['text']}" 
1186 |                 for item in formatted_transcript
1187 |             ])
1188 |             
1189 |             return {
1190 |                 'metadata': metadata,
1191 |                 'transcript': formatted_transcript,
1192 |                 'text': timestamped_text,
1193 |                 'channelId': video.get('snippet', {}).get('channelId')
1194 |             }
1195 |         except Exception as e:
1196 |             return {
1197 |                 'error': f"Could not retrieve transcript: {str(e)}",
1198 |                 'videoId': video_id,
1199 |                 'title': video.get('snippet', {}).get('title')
1200 |             }
1201 |             
1202 |     except Exception as e:
1203 |         logger.exception(f"Error in get_video_transcript: {e}")
1204 |         return {'error': str(e)}
1205 | 
1206 | @mcp.tool(
1207 |     name="get_related_videos",
1208 |     description="Get videos related to a specific YouTube video",
1209 | )
1210 | async def get_related_videos(video_id: str, max_results: Optional[int] = 10) -> Dict[str, Any]:
1211 |     """
1212 |     Get videos related to a specific YouTube video
1213 |     
1214 |     Args:
1215 |         video_id (str): YouTube video ID
1216 |         max_results (int): Maximum number of related videos to return (default: 10)
1217 |     
1218 |     Returns:
1219 |         Dict[str, Any]: Related videos data
1220 |     """
1221 |     try:
1222 |         related_data = youtube_service.get_related_videos(video_id, max_results)
1223 |         
1224 |         # Format the response
1225 |         formatted_videos = []
1226 |         for item in related_data.get('items', []):
1227 |             related_video_id = item.get('id', {}).get('videoId')
1228 |             
1229 |             formatted_videos.append({
1230 |                 'videoId': related_video_id,
1231 |                 'title': item.get('snippet', {}).get('title'),
1232 |                 'channelTitle': item.get('snippet', {}).get('channelTitle'),
1233 |                 'publishedAt': item.get('snippet', {}).get('publishedAt'),
1234 |                 'description': item.get('snippet', {}).get('description'),
1235 |                 'thumbnails': item.get('snippet', {}).get('thumbnails'),
1236 |                 'url': f"https://www.youtube.com/watch?v={related_video_id}"
1237 |             })
1238 |             
1239 |         return {
1240 |             'videos': formatted_videos,
1241 |             'totalResults': len(formatted_videos),
1242 |             'originalVideoId': video_id,
1243 |             'searchQuery': related_data.get('searchQuery', '')
1244 |         }
1245 |     except Exception as e:
1246 |         logger.exception(f"Error in get_related_videos: {e}")
1247 |         return {'error': str(e)}
1248 | 
1249 | @mcp.tool(
1250 |     name="get_trending_videos",
1251 |     description="Get trending videos on YouTube by region",
1252 | )
1253 | async def get_trending_videos(region_code: str = None, max_results: int = 5) -> Dict[str, Any]:
1254 |     """
1255 |     Get trending videos on YouTube by region
1256 |     
1257 |     Args:
1258 |         region_code (str): ISO country code (default: 'US')
1259 |         max_results (int): Maximum number of videos to return (default: 10)
1260 |     
1261 |     Returns:
1262 |         Dict[str, Any]: Trending videos data
1263 |     """
1264 |     try:
1265 |         # 이제 region_code 처리는 YouTubeService 클래스 내부에서 처리합니다
1266 |         trending_data = youtube_service.get_trending_videos(region_code, max_results)
1267 |         
1268 |         # Format the response
1269 |         formatted_videos = []
1270 |         for video in trending_data.get('items', []):
1271 |             formatted_videos.append({
1272 |                 'id': video.get('id'),
1273 |                 'title': video.get('snippet', {}).get('title'),
1274 |                 'description': video.get('snippet', {}).get('description'),
1275 |                 'publishedAt': video.get('snippet', {}).get('publishedAt'),
1276 |                 'channelId': video.get('snippet', {}).get('channelId'),
1277 |                 'channelTitle': video.get('snippet', {}).get('channelTitle'),
1278 |                 'viewCount': video.get('statistics', {}).get('viewCount'),
1279 |                 'likeCount': video.get('statistics', {}).get('likeCount'),
1280 |                 'commentCount': video.get('statistics', {}).get('commentCount'),
1281 |                 'thumbnails': video.get('snippet', {}).get('thumbnails'),
1282 |                 'url': f"https://www.youtube.com/watch?v={video.get('id')}"
1283 |             })
1284 |             
1285 |         return {
1286 |             'videos': formatted_videos,
1287 |             'region': region_code,
1288 |             'totalResults': len(formatted_videos)
1289 |         }
1290 |     except Exception as e:
1291 |         logger.exception(f"Error in get_trending_videos: {e}")
1292 |         return {'error': str(e)}
1293 | 
1294 | @mcp.tool(
1295 |     name="get_video_enhanced_transcript",
1296 |     description="Advanced transcript extraction tool with filtering, search, and multi-video capabilities. Provides rich transcript data for detailed analysis and processing. Features: 1) Extract transcripts from multiple videos; 2) Filter by time ranges; 3) Search within transcripts; 4) Segment transcripts; 5) Format output in different ways; 6) Include video metadata.",
1297 | )
1298 | async def get_video_enhanced_transcript(
1299 |     video_ids: List[str],
1300 |     language: Optional[str] = 'ko',
1301 |     start_time: Optional[int] = None,
1302 |     end_time: Optional[int] = None,
1303 |     query: Optional[str] = None,
1304 |     case_sensitive: Optional[bool] = False,
1305 |     segment_method: Optional[str] = "equal",
1306 |     segment_count: Optional[int] = 2,
1307 |     format: Optional[str] = "timestamped",
1308 |     include_metadata: Optional[bool] = False,
1309 | ) -> Dict[str, Any]:
1310 |     """
1311 |     Get enhanced transcript for one or more YouTube videos with advanced filtering and processing
1312 |     
1313 |     Args:
1314 |         video_ids (List[str]): List of YouTube video IDs (max 5)
1315 |         language (str, optional): Language code for transcript
1316 |         start_time (int, optional): Start time in seconds
1317 |         end_time (int, optional): End time in seconds
1318 |         query (str, optional): Search query
1319 |         case_sensitive (bool, optional): Whether to use case-sensitive search
1320 |         segment_method (str, optional): Segment method ("equal" or "smart")
1321 |         segment_count (int, optional): Number of segments
1322 |         format (str, optional): Output format ("raw", "timestamped", "merged")
1323 |         include_metadata (bool, optional): Whether to include video details
1324 |     
1325 |     Returns:
1326 |         Dict[str, Any]: Enhanced transcript data
1327 |     """
1328 |     try:
1329 |         # Validate input
1330 |         if not video_ids:
1331 |             return {'error': "No video IDs provided"}
1332 |         
1333 |         if len(video_ids) > 5:
1334 |             return {'error': "Maximum 5 video IDs allowed"}
1335 |             
1336 |         # Build options from individual parameters
1337 |         options = {
1338 |             'language': language,
1339 |             'format': format,
1340 |             'includeMetadata': include_metadata
1341 |         }
1342 |         
1343 |         # Add time range filter if specified
1344 |         if start_time is not None or end_time is not None:
1345 |             options['timeRange'] = {
1346 |                 'start': start_time,
1347 |                 'end': end_time
1348 |             }
1349 |             
1350 |         # Add search filter if specified
1351 |         if query:
1352 |             options['search'] = {
1353 |                 'query': query,
1354 |                 'caseSensitive': case_sensitive,
1355 |                 'contextLines': 2  # Default context lines
1356 |             }
1357 |             
1358 |         # Add segment option if specified
1359 |         options['segment'] = {
1360 |             'method': segment_method,
1361 |             'count': segment_count
1362 |         }
1363 |         
1364 |         # Call the enhanced transcript method
1365 |         transcript = youtube_service.get_video_enhanced_transcript(video_ids, options)
1366 |         
1367 |         return transcript
1368 |     except Exception as e:
1369 |         logger.exception(f"Error in get_video_enhanced_transcript: {e}")
1370 |         return {'error': str(e)}
1371 | 
1372 | # Server start point
1373 | if __name__ == "__main__":
1374 |     logger.info("Starting YouTube MCP server...")
1375 |     try:
1376 |         mcp.run()
1377 |     except Exception as e:
1378 |         logger.exception(f"Error running MCP server: {e}")
1379 | 


--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------
 1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml
 2 | 
 3 | startCommand:
 4 |   type: stdio
 5 |   configSchema:
 6 |     # JSON Schema defining the configuration options for the MCP.
 7 |     type: object
 8 |     required:
 9 |       - youtubeApiKey
10 |     properties:
11 |       youtubeApiKey:
12 |         type: string
13 |         description: YouTube API Key
14 |   commandFunction:
15 |     # A function that produces the CLI command to start the MCP on stdio.
16 |     |-
17 |     (config) => ({
18 |       command: 'python',
19 |       args: ['server.py'],
20 |       env: {
21 |         YOUTUBE_API_KEY: config.youtubeApiKey,
22 |       }
23 |     })
24 |   exampleConfig:
25 |     youtubeApiKey: your_youtube_api_key_here
26 | 


--------------------------------------------------------------------------------