├── src ├── client │ ├── __init__.py │ ├── base.py │ └── pixverse.py ├── __main__.py ├── utils │ ├── __init__.py │ ├── validation.py │ └── helpers.py ├── __init__.py ├── models │ ├── __init__.py │ ├── common.py │ ├── responses.py │ └── requests.py ├── exceptions.py ├── config.py ├── sse_server.py └── server.py ├── config.yaml ├── config.template.yaml ├── LICENSE ├── pyproject.toml ├── .gitignore ├── README-CN.md ├── README.md └── uv.lock /src/client/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pixverse API client module. 3 | """ 4 | 5 | from .base import BaseClient 6 | from .pixverse import PixverseClient 7 | 8 | __all__ = ["BaseClient", "PixverseClient"] 9 | -------------------------------------------------------------------------------- /src/__main__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Entry point for Pixverse MCP server. 3 | """ 4 | 5 | import asyncio 6 | from .server import cli_main 7 | 8 | 9 | def main(): 10 | """Main entry point for uvx.""" 11 | asyncio.run(cli_main()) 12 | 13 | 14 | if __name__ == "__main__": 15 | main() 16 | -------------------------------------------------------------------------------- /src/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions for Pixverse MCP. 3 | """ 4 | 5 | from .helpers import format_error_message, generate_trace_id 6 | from .validation import validate_model_constraints, validate_request_params 7 | 8 | __all__ = [ 9 | "validate_model_constraints", 10 | "validate_request_params", 11 | "generate_trace_id", 12 | "format_error_message", 13 | ] 14 | -------------------------------------------------------------------------------- /config.yaml: -------------------------------------------------------------------------------- 1 | # Pixverse MCP Configuration File 2 | # This is the MCP program configuration containing server settings and defaults 3 | # Users must configure PIXVERSE_API_KEY environment variable in ~/.cursor/mcp.json 4 | 5 | # API Configuration 6 | base_url: "https://app-api.pixverseai.cn" # MCP program configuration, users don't need to modify 7 | # Note: api_key must be provided via PIXVERSE_API_KEY environment variable 8 | -------------------------------------------------------------------------------- /config.template.yaml: -------------------------------------------------------------------------------- 1 | # Pixverse MCP Configuration Template 2 | # Copy this file to config.yaml and fill in your configuration 3 | 4 | # API Configuration 5 | base_url: "https://app-api.pixverseai.ai" # Pixverse API base URL 6 | api_key: "YOUR_PIXVERSE_API_KEY_HERE" # Replace with your Pixverse API Key 7 | 8 | # Server Configuration (optional) 9 | timeout: 30 # Request timeout in seconds 10 | max_retries: 3 # Maximum retry attempts 11 | 12 | # Usage instructions: 13 | # 1. Copy this file to config.yaml 14 | # 2. Replace YOUR_PIXVERSE_API_KEY_HERE with your real API Key 15 | # 3. Adjust other configuration items as needed 16 | # 4. Run: uvx pixverse-mcp --config config.yaml 17 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pixverse MCP - Model Context Protocol server for Pixverse video generation APIs. 3 | 4 | This package provides a comprehensive interface to Pixverse's video generation 5 | capabilities through the Model Context Protocol (MCP). 6 | """ 7 | 8 | __version__ = "0.2.2" 9 | __author__ = "Pixverse Team" 10 | __email__ = "dev@pixverse.ai" 11 | 12 | from .client import PixverseClient 13 | from .exceptions import ( 14 | PixverseAPIError, 15 | PixverseAuthError, 16 | PixverseError, 17 | PixverseRateLimitError, 18 | PixverseValidationError, 19 | ) 20 | 21 | __all__ = [ 22 | "PixverseClient", 23 | "PixverseError", 24 | "PixverseAPIError", 25 | "PixverseAuthError", 26 | "PixverseRateLimitError", 27 | "PixverseValidationError", 28 | ] 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright 2025 PixVerse. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/models/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pydantic models for Pixverse API requests and responses. 3 | """ 4 | 5 | from .common import ( 6 | AspectRatio, 7 | CameraMovement, 8 | FusionType, 9 | ModelVersion, 10 | MotionMode, 11 | VideoQuality, 12 | VideoStyle, 13 | ) 14 | from .requests import ( 15 | ExtendVideoRequest, 16 | FusionVideoRequest, 17 | ImageReference, 18 | ImageToVideoRequest, 19 | LipSyncVideoRequest, 20 | SoundEffectVideoRequest, 21 | TextToVideoRequest, 22 | TransitionVideoRequest, 23 | ) 24 | from .responses import ( 25 | ErrorResponse, 26 | VideoGenerationResponse, 27 | VideoStatus, 28 | ) 29 | 30 | __all__ = [ 31 | # Requests 32 | "TextToVideoRequest", 33 | "ImageToVideoRequest", 34 | "TransitionVideoRequest", 35 | "ExtendVideoRequest", 36 | "LipSyncVideoRequest", 37 | "SoundEffectVideoRequest", 38 | "FusionVideoRequest", 39 | "ImageReference", 40 | # Responses 41 | "VideoGenerationResponse", 42 | "ErrorResponse", 43 | "VideoStatus", 44 | # Common 45 | "ModelVersion", 46 | "VideoQuality", 47 | "MotionMode", 48 | "CameraMovement", 49 | "VideoStyle", 50 | "AspectRatio", 51 | "FusionType", 52 | ] 53 | -------------------------------------------------------------------------------- /src/exceptions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Custom exceptions for Pixverse MCP. 3 | """ 4 | 5 | from typing import Any, Dict, Optional 6 | 7 | 8 | class PixverseError(Exception): 9 | """Base exception for all Pixverse-related errors.""" 10 | 11 | def __init__(self, message: str, error_code: Optional[int] = None, details: Optional[Dict[str, Any]] = None): 12 | super().__init__(message) 13 | self.message = message 14 | self.error_code = error_code 15 | self.details = details or {} 16 | 17 | def __str__(self) -> str: 18 | if self.error_code: 19 | return f"[{self.error_code}] {self.message}" 20 | return self.message 21 | 22 | 23 | class PixverseAPIError(PixverseError): 24 | """Raised when the Pixverse API returns an error response.""" 25 | 26 | def __init__( 27 | self, 28 | message: str, 29 | status_code: int, 30 | error_code: Optional[int] = None, 31 | response_data: Optional[Dict[str, Any]] = None, 32 | ): 33 | super().__init__(message, error_code, response_data) 34 | self.status_code = status_code 35 | self.response_data = response_data or {} 36 | 37 | 38 | class PixverseAuthError(PixverseError): 39 | """Raised when authentication fails.""" 40 | 41 | pass 42 | 43 | 44 | class PixverseRateLimitError(PixverseError): 45 | """Raised when rate limit is exceeded.""" 46 | 47 | def __init__(self, message: str = "Rate limit exceeded", retry_after: Optional[int] = None, **kwargs): 48 | super().__init__(message, **kwargs) 49 | self.retry_after = retry_after 50 | 51 | 52 | class PixverseValidationError(PixverseError): 53 | """Raised when request validation fails.""" 54 | 55 | def __init__(self, message: str, field: Optional[str] = None, **kwargs): 56 | super().__init__(message, **kwargs) 57 | self.field = field 58 | 59 | 60 | class PixverseTimeoutError(PixverseError): 61 | """Raised when a request times out.""" 62 | 63 | pass 64 | 65 | 66 | class PixverseConnectionError(PixverseError): 67 | """Raised when connection to Pixverse API fails.""" 68 | 69 | pass 70 | -------------------------------------------------------------------------------- /src/models/common.py: -------------------------------------------------------------------------------- 1 | """ 2 | Common enums and types used across Pixverse API models. 3 | """ 4 | 5 | from enum import Enum 6 | from typing import Literal 7 | 8 | 9 | class ModelVersion(str, Enum): 10 | """Supported model versions.""" 11 | 12 | V1 = "v1" 13 | V2 = "v2" 14 | V3 = "v3" 15 | V3_5 = "v3.5" 16 | V4 = "v4" 17 | V4_5 = "v4.5" 18 | V5 = "v5" 19 | VISIONARY = "visionary" 20 | 21 | 22 | class VideoQuality(str, Enum): 23 | """Video quality options.""" 24 | 25 | Q360P = "360p" 26 | Q540P = "540p" 27 | Q720P = "720p" 28 | Q1080P = "1080p" 29 | 30 | 31 | class MotionMode(str, Enum): 32 | """Motion mode options.""" 33 | 34 | NORMAL = "normal" 35 | FAST = "fast" 36 | 37 | 38 | class CameraMovement(str, Enum): 39 | """Camera movement options.""" 40 | 41 | HORIZONTAL_RIGHT = "horizontal_right" 42 | HORIZONTAL_LEFT = "horizontal_left" 43 | ZOOM_IN = "zoom_in" 44 | ZOOM_OUT = "zoom_out" 45 | VERTICAL_UP = "vertical_up" 46 | VERTICAL_DOWN = "vertical_down" 47 | CRANE_UP = "crane_up" 48 | QUICKLY_ZOOM_IN = "quickly_zoom_in" 49 | QUICKLY_ZOOM_OUT = "quickly_zoom_out" 50 | SMOOTH_ZOOM_IN = "smooth_zoom_in" 51 | CAMERA_ROTATION = "camera_rotation" 52 | ROBO_ARM = "robo_arm" 53 | SUPER_DOLLY_OUT = "super_dolly_out" 54 | WHIP_PAN = "whip_pan" 55 | HITCHCOCK = "hitchcock" 56 | LEFT_FOLLOW = "left_follow" 57 | RIGHT_FOLLOW = "right_follow" 58 | PAN_LEFT = "pan_left" 59 | PAN_RIGHT = "pan_right" 60 | FIX_BG = "fix_bg" 61 | DEFAULT = "default" 62 | 63 | 64 | class VideoStyle(str, Enum): 65 | """Video style options.""" 66 | 67 | ANIME = "anime" 68 | ANIMATION_3D = "3d_animation" 69 | CLAY = "clay" 70 | REALISTIC = "realistic" 71 | COMIC = "comic" 72 | CYBERPUNK = "cyberpunk" 73 | 74 | 75 | class AspectRatio(str, Enum): 76 | """Aspect ratio options.""" 77 | 78 | RATIO_16_9 = "16:9" 79 | RATIO_4_3 = "4:3" 80 | RATIO_1_1 = "1:1" 81 | RATIO_3_4 = "3:4" 82 | RATIO_9_16 = "9:16" 83 | 84 | 85 | class SoundMode(str, Enum): 86 | """Sound effect mode options.""" 87 | 88 | DEFAULT_MUSIC = "default_music" 89 | 90 | 91 | class FusionType(str, Enum): 92 | """Fusion image reference types.""" 93 | 94 | SUBJECT = "subject" 95 | BACKGROUND = "background" 96 | 97 | 98 | # Type aliases for better readability 99 | Duration = Literal[5, 8, 10] 100 | Seed = int 101 | TemplateId = int 102 | ImageId = int 103 | VideoId = int 104 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "pixverse-mcp" 3 | version = "0.2.2" 4 | description = "Model Context Protocol server for Pixverse video generation APIs" 5 | authors = [ 6 | {name = "Pixverse Team", email = "dev@pixverse.ai"} 7 | ] 8 | readme = "README.md" 9 | license = {text = "MIT"} 10 | requires-python = ">=3.12" 11 | classifiers = [ 12 | "Development Status :: 4 - Beta", 13 | "Intended Audience :: Developers", 14 | "License :: OSI Approved :: MIT License", 15 | "Programming Language :: Python :: 3", 16 | "Programming Language :: Python :: 3.12", 17 | ] 18 | dependencies = [ 19 | "mcp[cli]>=1.6.0", 20 | "httpx>=0.24.0", 21 | "pydantic>=2.0.0", 22 | "asyncio-throttle>=1.0.0", 23 | "tenacity>=8.0.0", 24 | "python-dotenv>=1.0.0", 25 | "loguru>=0.7.0", 26 | "pyyaml>=6.0.0", 27 | "fastapi>=0.104.0", 28 | "uvicorn>=0.24.0", 29 | "sse-starlette>=1.6.0", 30 | ] 31 | 32 | [project.optional-dependencies] 33 | dev = [ 34 | "pytest>=7.0.0", 35 | "pytest-asyncio>=0.21.0", 36 | "pytest-cov>=4.0.0", 37 | "black>=23.0.0", 38 | "isort>=5.12.0", 39 | "flake8>=6.0.0", 40 | "mypy>=1.0.0", 41 | "pre-commit>=3.0.0", 42 | ] 43 | 44 | [project.urls] 45 | Homepage = "https://github.com/pixverse/pixverse-mcp" 46 | Repository = "https://github.com/pixverse/pixverse-mcp" 47 | Documentation = "https://pixverse-mcp.readthedocs.io" 48 | Issues = "https://github.com/pixverse/pixverse-mcp/issues" 49 | 50 | [project.scripts] 51 | pixverse-mcp = "pixverse_mcp.__main__:main" 52 | 53 | [build-system] 54 | requires = ["hatchling"] 55 | build-backend = "hatchling.build" 56 | 57 | [tool.hatch.build.targets.wheel] 58 | packages = ["src/pixverse_mcp"] 59 | 60 | [tool.hatch.build.targets.wheel.sources] 61 | "src/pixverse_mcp" = "pixverse_mcp" 62 | 63 | [tool.uvx] 64 | entry-point = "pixverse_mcp.__main__:main" 65 | 66 | [tool.black] 67 | line-length = 88 68 | target-version = ['py38'] 69 | include = '\.pyi?$' 70 | extend-exclude = ''' 71 | /( 72 | # directories 73 | \.eggs 74 | | \.git 75 | | \.hg 76 | | \.mypy_cache 77 | | \.tox 78 | | \.venv 79 | | build 80 | | dist 81 | )/ 82 | ''' 83 | 84 | [tool.isort] 85 | profile = "black" 86 | multi_line_output = 3 87 | line_length = 88 88 | known_first_party = ["pixverse_mcp"] 89 | 90 | [tool.mypy] 91 | python_version = "3.8" 92 | warn_return_any = true 93 | warn_unused_configs = true 94 | disallow_untyped_defs = true 95 | disallow_incomplete_defs = true 96 | check_untyped_defs = true 97 | disallow_untyped_decorators = true 98 | no_implicit_optional = true 99 | warn_redundant_casts = true 100 | warn_unused_ignores = true 101 | warn_no_return = true 102 | warn_unreachable = true 103 | strict_equality = true 104 | 105 | [tool.pytest.ini_options] 106 | testpaths = ["tests"] 107 | python_files = ["test_*.py", "*_test.py"] 108 | python_classes = ["Test*"] 109 | python_functions = ["test_*"] 110 | addopts = [ 111 | "--strict-markers", 112 | "--strict-config", 113 | "--cov=src/pixverse_mcp", 114 | "--cov-report=term-missing", 115 | "--cov-report=html", 116 | "--cov-report=xml", 117 | ] 118 | markers = [ 119 | "unit: Unit tests", 120 | "integration: Integration tests", 121 | "slow: Slow running tests", 122 | ] 123 | 124 | [dependency-groups] 125 | dev = [ 126 | "twine>=6.2.0", 127 | ] 128 | -------------------------------------------------------------------------------- /src/models/responses.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pydantic models for Pixverse API responses. 3 | """ 4 | 5 | from enum import Enum 6 | from typing import Any, Dict, Optional 7 | 8 | from pydantic import BaseModel, Field 9 | 10 | 11 | class VideoStatus(str, Enum): 12 | """Video generation status.""" 13 | 14 | PENDING = "pending" 15 | IN_PROGRESS = "in_progress" 16 | COMPLETED = "completed" 17 | FAILED = "failed" 18 | CANCELLED = "cancelled" 19 | 20 | 21 | class VideoGenerationResponse(BaseModel): 22 | """Response model for video generation requests.""" 23 | 24 | video_id: int 25 | status: VideoStatus = VideoStatus.PENDING 26 | 27 | # Fields from GetOpenapiMediaDetailResp (for get_video_result) 28 | id: Optional[int] = None 29 | prompt: Optional[str] = None 30 | negative_prompt: Optional[str] = None 31 | resolution_ratio: Optional[int] = None 32 | url: Optional[str] = None # video URL 33 | size: Optional[int] = None # video file size 34 | seed: Optional[int] = None 35 | style: Optional[str] = None 36 | create_time: Optional[str] = None 37 | modify_time: Optional[str] = None 38 | outputWidth: Optional[int] = None 39 | outputHeight: Optional[int] = None 40 | has_audio: Optional[bool] = None 41 | customer_paths: Optional[Any] = None 42 | 43 | # Convenience properties for backward compatibility 44 | @property 45 | def video_url(self) -> Optional[str]: 46 | return self.url 47 | 48 | @property 49 | def width(self) -> Optional[int]: 50 | return self.outputWidth 51 | 52 | @property 53 | def height(self) -> Optional[int]: 54 | return self.outputHeight 55 | 56 | @property 57 | def file_size(self) -> Optional[int]: 58 | return self.size 59 | 60 | @property 61 | def duration(self) -> Optional[int]: 62 | # Duration is not directly available, but we can estimate from size 63 | return None 64 | 65 | 66 | class ErrorResponse(BaseModel): 67 | """Error response model.""" 68 | 69 | ErrCode: int 70 | ErrMsg: str 71 | details: Optional[Dict[str, Any]] = None 72 | 73 | 74 | class APIResponse(BaseModel): 75 | """Generic API response wrapper.""" 76 | 77 | Resp: Optional[Dict[str, Any]] = None 78 | ErrCode: int = 0 79 | ErrMsg: str = "Success" 80 | 81 | 82 | class LipSyncTTSInfo(BaseModel): 83 | """TTS speaker information for lip sync.""" 84 | 85 | speaker_id: str 86 | name: str 87 | 88 | 89 | class LipSyncTTSListResponse(BaseModel): 90 | """Response for TTS speaker list.""" 91 | 92 | total: int 93 | data: list[LipSyncTTSInfo] 94 | 95 | 96 | class VideoCreditsResponse(BaseModel): 97 | """Response for video credits information.""" 98 | 99 | video_id: int 100 | credit: int 101 | 102 | 103 | class MediaUploadResponse(BaseModel): 104 | """Response for media upload.""" 105 | 106 | media_id: int = Field(alias="media_id") 107 | media_type: str = Field(alias="media_type") 108 | url: str = Field(alias="url") 109 | 110 | class Config: 111 | populate_by_name = True 112 | 113 | 114 | class ImageUploadResponse(BaseModel): 115 | """Response for image upload.""" 116 | 117 | img_id: int = Field(alias="ImgID") 118 | img_url: str = Field(alias="ImgUrl") 119 | 120 | class Config: 121 | populate_by_name = True 122 | -------------------------------------------------------------------------------- /src/utils/validation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Validation utilities for Pixverse API requests. 3 | """ 4 | 5 | from typing import Any, Dict, List, Optional 6 | 7 | from ..exceptions import PixverseValidationError 8 | from ..models.common import ModelVersion, MotionMode, VideoQuality 9 | 10 | 11 | def validate_model_constraints( 12 | model: str, 13 | quality: str, 14 | duration: int, 15 | motion_mode: Optional[str] = None, 16 | ) -> None: 17 | """ 18 | Validate model-specific constraints. 19 | 20 | Args: 21 | model: Model version 22 | quality: Video quality 23 | duration: Video duration 24 | motion_mode: Motion mode 25 | 26 | Raises: 27 | PixverseValidationError: If constraints are violated 28 | """ 29 | # V5 model constraints 30 | if model == ModelVersion.V5: 31 | if motion_mode == MotionMode.FAST: 32 | raise PixverseValidationError("V5 model does not support fast motion mode", field="motion_mode") 33 | 34 | # Quality constraints 35 | if quality == VideoQuality.Q1080P and duration > 5: 36 | raise PixverseValidationError("1080p quality does not support duration > 5 seconds", field="quality") 37 | 38 | # Motion mode constraints 39 | if motion_mode == MotionMode.FAST and duration > 5: 40 | raise PixverseValidationError("Fast motion mode only supports duration <= 5 seconds", field="motion_mode") 41 | 42 | 43 | def validate_request_params(params: Dict[str, Any]) -> List[str]: 44 | """ 45 | Validate request parameters and return list of warnings. 46 | 47 | Args: 48 | params: Request parameters 49 | 50 | Returns: 51 | List of validation warnings 52 | """ 53 | warnings = [] 54 | 55 | # Check for conflicting parameters 56 | if params.get("template_id") and params.get("camera_movement"): 57 | warnings.append("template_id and camera_movement cannot be used together") 58 | 59 | if params.get("img_id") and params.get("img_ids"): 60 | warnings.append("img_id and img_ids cannot be used together") 61 | 62 | if params.get("sound_effect_content") and params.get("sound_mode"): 63 | warnings.append("sound_effect_content and sound_mode cannot be used together") 64 | 65 | if params.get("source_video_id") and params.get("video_media_id"): 66 | warnings.append("source_video_id and video_media_id cannot be used together") 67 | 68 | # Check required parameters for specific scenarios 69 | template_id = params.get("template_id", 0) 70 | if template_id == 0: # Non-template scenario 71 | if not params.get("img_id") and not params.get("img_ids"): 72 | warnings.append("img_id or img_ids required when template_id is not provided") 73 | 74 | return warnings 75 | 76 | 77 | def validate_fusion_prompt(prompt: str, image_references: List[Dict[str, Any]]) -> None: 78 | """ 79 | Validate fusion prompt contains all required references. 80 | 81 | Args: 82 | prompt: Prompt text 83 | image_references: List of image references 84 | 85 | Raises: 86 | PixverseValidationError: If validation fails 87 | """ 88 | ref_names = {ref["ref_name"] for ref in image_references} 89 | 90 | for ref_name in ref_names: 91 | if f"@{ref_name}" not in prompt: 92 | raise PixverseValidationError(f"Reference @{ref_name} not found in prompt", field="prompt") 93 | 94 | # Check for unique ref_names 95 | all_ref_names = [ref["ref_name"] for ref in image_references] 96 | if len(all_ref_names) != len(set(all_ref_names)): 97 | raise PixverseValidationError("All ref_names must be unique", field="image_references") 98 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # UV 98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | #uv.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 116 | .pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | 139 | # Spyder project settings 140 | .spyderproject 141 | .spyproject 142 | 143 | # Rope project settings 144 | .ropeproject 145 | 146 | # mkdocs documentation 147 | /site 148 | 149 | # mypy 150 | .mypy_cache/ 151 | .dmypy.json 152 | dmypy.json 153 | 154 | # Pyre type checker 155 | .pyre/ 156 | 157 | # pytype static type analyzer 158 | .pytype/ 159 | 160 | # Cython debug symbols 161 | cython_debug/ 162 | 163 | # PyCharm 164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 166 | # and can be added to the global gitignore or merged into this file. For a more nuclear 167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 168 | #.idea/ 169 | 170 | # Ruff stuff: 171 | .ruff_cache/ 172 | 173 | # PyPI configuration file 174 | .pypirc 175 | -------------------------------------------------------------------------------- /src/utils/helpers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper utilities for Pixverse MCP. 3 | """ 4 | 5 | import uuid 6 | from typing import Any, Dict, Optional 7 | 8 | 9 | def generate_trace_id() -> str: 10 | """Generate a unique trace ID for request tracking.""" 11 | return str(uuid.uuid4()) 12 | 13 | 14 | def format_error_message(error: Exception, context: Optional[str] = None) -> str: 15 | """ 16 | Format error message with context. 17 | 18 | Args: 19 | error: Exception object 20 | context: Optional context information 21 | 22 | Returns: 23 | Formatted error message 24 | """ 25 | base_message = str(error) 26 | 27 | if context: 28 | return f"{context}: {base_message}" 29 | 30 | return base_message 31 | 32 | 33 | def sanitize_prompt(prompt: str) -> str: 34 | """ 35 | Sanitize prompt text for API submission. 36 | 37 | Args: 38 | prompt: Raw prompt text 39 | 40 | Returns: 41 | Sanitized prompt text 42 | """ 43 | # Remove excessive whitespace 44 | prompt = " ".join(prompt.split()) 45 | 46 | # Trim to maximum length 47 | if len(prompt) > 2048: 48 | prompt = prompt[:2045] + "..." 49 | 50 | return prompt 51 | 52 | 53 | def build_request_summary(request_type: str, params: Dict[str, Any]) -> str: 54 | """ 55 | Build a summary string for a request. 56 | 57 | Args: 58 | request_type: Type of request 59 | params: Request parameters 60 | 61 | Returns: 62 | Summary string 63 | """ 64 | summary_parts = [f"Type: {request_type}"] 65 | 66 | # Add key parameters to summary 67 | if "prompt" in params: 68 | prompt = params["prompt"][:50] + "..." if len(params["prompt"]) > 50 else params["prompt"] 69 | summary_parts.append(f"Prompt: {prompt}") 70 | 71 | if "model" in params: 72 | summary_parts.append(f"Model: {params['model']}") 73 | 74 | if "duration" in params: 75 | summary_parts.append(f"Duration: {params['duration']}s") 76 | 77 | if "quality" in params: 78 | summary_parts.append(f"Quality: {params['quality']}") 79 | 80 | if "img_id" in params: 81 | summary_parts.append(f"Image ID: {params['img_id']}") 82 | 83 | if "template_id" in params and params["template_id"]: 84 | summary_parts.append(f"Template: {params['template_id']}") 85 | 86 | return " | ".join(summary_parts) 87 | 88 | 89 | def extract_video_id_from_response(response_data: Dict[str, Any]) -> Optional[int]: 90 | """ 91 | Extract video ID from API response. 92 | 93 | Args: 94 | response_data: API response data 95 | 96 | Returns: 97 | Video ID if found, None otherwise 98 | """ 99 | # Try different possible locations for video_id 100 | resp_data = response_data.get("Resp", {}) 101 | 102 | if isinstance(resp_data, dict): 103 | return resp_data.get("video_id") 104 | 105 | # Fallback to top-level 106 | return response_data.get("video_id") 107 | 108 | 109 | def get_popular_templates() -> Dict[int, str]: 110 | """ 111 | Get mapping of popular template IDs to names. 112 | 113 | Returns: 114 | Dictionary mapping template ID to name 115 | """ 116 | return { 117 | 315446315336768: "Kiss Kiss", 118 | 315447659476032: "Kungfu Club", 119 | 315447659476033: "Earth Zoom", 120 | 316826014376384: "General Effects", 121 | 313555098280384: "App Filter Template", 122 | 321958627120000: "App Filter Template 2", 123 | } 124 | 125 | 126 | def get_recommended_settings(model: str) -> Dict[str, Any]: 127 | """ 128 | Get recommended settings for a model. 129 | 130 | Args: 131 | model: Model version 132 | 133 | Returns: 134 | Dictionary of recommended settings 135 | """ 136 | if model == "v5": 137 | return { 138 | "duration": 5, 139 | "quality": "540p", 140 | "motion_mode": "normal", 141 | "sound_effect_switch": True, 142 | } 143 | elif model in ["v4", "v4.5"]: 144 | return { 145 | "duration": 5, 146 | "quality": "540p", 147 | "motion_mode": "normal", 148 | } 149 | else: 150 | return { 151 | "duration": 5, 152 | "quality": "540p", 153 | } 154 | -------------------------------------------------------------------------------- /src/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configuration management for Pixverse MCP. 3 | """ 4 | 5 | import os 6 | import json 7 | import yaml 8 | from pathlib import Path 9 | from typing import Optional, Dict, Any 10 | from pydantic import BaseModel, Field 11 | from loguru import logger 12 | 13 | 14 | class PixverseConfig(BaseModel): 15 | """Pixverse MCP configuration model.""" 16 | 17 | # API Configuration 18 | api_key: Optional[str] = Field(default=None, description="Pixverse API key (must be provided via PIXVERSE_API_KEY environment variable)") 19 | base_url: str = Field(default="https://app-api.pixverse.ai", description="API base URL") 20 | 21 | 22 | class ConfigManager: 23 | """Configuration manager for Pixverse MCP.""" 24 | 25 | def __init__(self, config_path: Optional[str] = None): 26 | self.config_path = config_path 27 | self._config: Optional[PixverseConfig] = None 28 | 29 | def load_config(self, config_path: Optional[str] = None) -> PixverseConfig: 30 | """Load configuration from file or environment variables.""" 31 | if config_path: 32 | self.config_path = config_path 33 | 34 | # Try to load from file first (optional) 35 | if self.config_path and Path(self.config_path).exists(): 36 | config_data = self._load_from_file(self.config_path) 37 | logger.info(f"Configuration file loaded: {self.config_path}") 38 | else: 39 | config_data = {} 40 | if self.config_path: 41 | logger.info(f"Configuration file not found: {self.config_path}, using defaults and environment variables") 42 | else: 43 | logger.info("No configuration file specified, using defaults and environment variables") 44 | 45 | # Override with environment variables 46 | config_data.update(self._load_from_env()) 47 | 48 | # Validate required fields 49 | if not config_data.get("api_key"): 50 | raise ValueError("API key is required. Please set PIXVERSE_API_KEY environment variable in your ~/.cursor/mcp.json configuration.") 51 | 52 | self._config = PixverseConfig(**config_data) 53 | logger.info(f"Configuration loaded successfully") 54 | return self._config 55 | 56 | def _load_from_file(self, config_path: str) -> Dict[str, Any]: 57 | """Load configuration from file.""" 58 | path = Path(config_path) 59 | 60 | try: 61 | with open(path, 'r', encoding='utf-8') as f: 62 | if path.suffix.lower() in ['.yaml', '.yml']: 63 | data = yaml.safe_load(f) 64 | elif path.suffix.lower() == '.json': 65 | data = json.load(f) 66 | else: 67 | raise ValueError(f"Unsupported config file format: {path.suffix}") 68 | 69 | logger.info(f"Configuration loaded from file: {config_path}") 70 | return data or {} 71 | 72 | except Exception as e: 73 | logger.warning(f"Failed to load config file {config_path}: {e}") 74 | return {} 75 | 76 | def _load_from_env(self) -> Dict[str, Any]: 77 | """Load configuration from environment variables. 78 | 79 | Only load user-specific configurations from environment variables. 80 | MCP program configurations should be in config.yaml file. 81 | """ 82 | # Only allow user-specific environment variables 83 | env_mapping = { 84 | "PIXVERSE_API_KEY": "api_key", 85 | # Note: PIXVERSE_BASE_URL is intentionally excluded for normal users 86 | # It should be configured in config.yaml, not by users 87 | } 88 | 89 | # Advanced users can still override program configs with these env vars 90 | advanced_env_mapping = { 91 | "PIXVERSE_BASE_URL": "base_url", 92 | } 93 | 94 | # Combine both mappings for backward compatibility 95 | env_mapping.update(advanced_env_mapping) 96 | 97 | config_data = {} 98 | for env_key, config_key in env_mapping.items(): 99 | value = os.getenv(env_key) 100 | if value is not None: 101 | config_data[config_key] = value 102 | 103 | return config_data 104 | 105 | @property 106 | def config(self) -> PixverseConfig: 107 | """Get current configuration.""" 108 | if self._config is None: 109 | self._config = self.load_config() 110 | return self._config 111 | 112 | def save_config(self, config_path: Optional[str] = None) -> None: 113 | """Save current configuration to file.""" 114 | if not self._config: 115 | raise ValueError("No configuration to save") 116 | 117 | path = config_path or self.config_path 118 | if not path: 119 | raise ValueError("No config path specified") 120 | 121 | path = Path(path) 122 | config_dict = self._config.model_dump() 123 | 124 | try: 125 | with open(path, 'w', encoding='utf-8') as f: 126 | if path.suffix.lower() in ['.yaml', '.yml']: 127 | yaml.safe_dump(config_dict, f, default_flow_style=False, indent=2) 128 | elif path.suffix.lower() == '.json': 129 | json.dump(config_dict, f, indent=2, ensure_ascii=False) 130 | else: 131 | raise ValueError(f"Unsupported config file format: {path.suffix}") 132 | 133 | logger.info(f"Configuration saved to: {path}") 134 | 135 | except Exception as e: 136 | logger.error(f"Failed to save config to {path}: {e}") 137 | raise 138 | 139 | 140 | # Global config manager instance 141 | _config_manager: Optional[ConfigManager] = None 142 | 143 | 144 | def get_config_manager(config_path: Optional[str] = None) -> ConfigManager: 145 | """Get global configuration manager instance.""" 146 | global _config_manager 147 | if _config_manager is None or config_path: 148 | _config_manager = ConfigManager(config_path) 149 | return _config_manager 150 | 151 | 152 | def get_config(config_path: Optional[str] = None) -> PixverseConfig: 153 | """Get configuration instance.""" 154 | manager = get_config_manager(config_path) 155 | return manager.config 156 | -------------------------------------------------------------------------------- /README-CN.md: -------------------------------------------------------------------------------- 1 | # PixVerse MCP 使用指南 2 |
3 | 4 | Webapp 5 | 6 | 7 | API 8 | 9 |
10 | 11 | ## 概述 12 | 13 | PixVerse MCP 是一个允许您通过支持 Model Context Protocol (MCP) 的应用程序(如 Claude 或 Cursor)访问 PixVerse 最新的视频生成模型。从文本生成视频、动画图片、创建过渡、添加唇形同步、音效等等! 14 | 15 | [English Version](https://github.com/PixVerseAI/PixVerse-MCP/blob/main/README.md) 16 | 17 | ## 主要功能 18 | 19 | - **文本转视频**:使用文本提示生成创意视频 20 | - **图片转视频**:将静态图片转换为动态视频 21 | - **视频扩展**:无缝扩展现有视频以创建更长的序列 22 | - **首尾帧**:在不同图片之间创建平滑变形 23 | - **唇形同步**:为说话人头像视频添加逼真的唇形同步(支持TTS或自定义音频) 24 | - **音效生成**:基于视频内容生成情境音效 25 | - **多主体**:将多个主体合成到一个场景中 26 | - **资源管理**:从本地文件或URL上传图片和视频 27 | - **与AI助手协同创作**:与Claude等AI模型协作增强您的创意工作流程 28 | 29 | ## 系统组件 30 | 31 | 该系统的主要组件: 32 | 33 | 1. **UVX MCP 服务器**: 34 | - 基于 Python 的云端服务器 35 | - 直接与 PixVerse API 通信 36 | - 提供完整的视频生成功能 37 | 38 | ## 安装与配置 39 | 40 | ### 前提条件 41 | 42 | 1. **Python 3.10 或更新版本** 43 | 2. **UV/UVX** 44 | 3. **PixVerse API 密钥**:从 [PixVerse Platform](https://platform.pixverse.ai?utm_source=github&utm_medium=readme&utm_campaign=mcp) 获取 (API Credits 需要从[PixVerse Platform](https://platform.pixverse.ai?utm_source=github&utm_medium=readme&utm_campaign=mcp)购买) 45 | 46 | ### 获取依赖 47 | 48 | 1. **Python**: 49 | - 从 [Python 官网](https://www.python.org/downloads/) 下载并安装 50 | - 确保将 Python 添加到系统路径 51 | 52 | 2. **UV/UVX** 53 | - 通过下面方式下载UV / UVX 54 | - mac/linux 55 | ``` 56 | curl -LsSf https://astral.sh/uv/install.sh | sh 57 | ``` 58 | - Windows 59 | ``` 60 | powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex" 61 | ``` 62 | 63 | ## 如何使用 MCP Server 64 | 65 | ### 1. 获取 PixVerse API 密钥 66 | 67 | - 访问 [PixVerse Platform](https://platform.pixverse.ai/api-key?utm_source=github&utm_medium=readme&utm_campaign=mcp) 68 | - 注册/登录您的账户 69 | - 在个人设置中创建并复制您的 API 密钥 70 | - 创建方法:[API密钥生成指南](https://docs.platform.pixverse.ai/how-to-get-api-key-882968m0) 71 | 72 | ### 2. 下载必要依赖 73 | 74 | - **Python**:安装 Python 3.10 或更高版本 75 | - **UV/UVX**:安装最新稳定版的 UV & UVX 76 | 77 | ### 3. 配置 MCP 客户端 78 | 79 | - 打开您的 MCP 客户端(如 Claude for Desktop 或 Cursor) 80 | - 找到客户端设置 81 | - 找到并打开 `mcp_config.json` 文件(或相应的配置文件) 82 | - 根据您选择的方式添加配置: 83 | 84 | ```json 85 | { 86 | "mcpServers": { 87 | "PixVerse": { 88 | "command": "uvx", 89 | "args": [ 90 | "pixverse-mcp" 91 | ], 92 | "env": { 93 | "PIXVERSE_API_KEY": "your-api-key-here" 94 | } 95 | } 96 | } 97 | } 98 | ``` 99 | 100 | - 将从platform.pixverse.ai 获取的API key 写到"PIXVERSE_API_KEY" :"xxxx" 上 101 | - 保存配置文件 102 | 103 | ### 4. 重启 MCP 客户端或刷新 MCP 服务器 104 | 105 | - 完全关闭并重新打开您的 MCP 客户端 106 | - 或者,如果客户端支持,使用刷新 MCP 服务器选项 107 | 108 | ## 具体客户端配置 109 | 110 | ### Claude for Desktop 配置 111 | 112 | 1. 打开 Claude 应用 113 | 2. 转到 Claude > 设置 > 开发者 > 编辑配置 114 | 3. 打开 `claude_desktop_config.json` 文件 115 | 4. 添加上述配置并保存 116 | 5. 重启 Claude 应用 117 | - 如果连接成功, 首页不会提示任何error, 且mcp设置亮绿灯 118 | - 如果连接失败, 首页会提示连接失败 119 | 120 | ### Cursor 配置 121 | 122 | 1. 打开 Cursor 应用 123 | 2. 转到设置 > Model Context Protocol 124 | 3. 添加新服务器 125 | 4. 填写服务器详情(与上面 JSON 配置中的信息相同) 126 | 5. 保存并重启 or 刷新mcp server 127 | 128 | ## 使用方法 129 | 130 | ### 文生视频 131 | 132 | 通过 Claude 或 Cursor,您可以使用自然语言描述要生成的视频: 133 | 134 | **基础示例**: 135 | 136 | ``` 137 | 请生成一个关于海洋日落的视频,金色的阳光洒在海面上,波浪轻轻拍打着岸边。 138 | ``` 139 | 140 | **进阶示例(带参数)**: 141 | ``` 142 | 请使用以下参数生成一个城市夜景视频: 143 | 内容:高楼大厦的灯光在夜幕中闪烁,街道上车流形成光线轨迹 144 | 宽高比:16:9 145 | 质量:540p 146 | 时长:5秒 147 | 运动模式:normal 148 | 负面提示词:模糊、抖动、文字 149 | ``` 150 | 151 | 您可以指定以下参数: 152 | * 宽高比(16:9, 4:3, 1:1, 3:4, 9:16) 153 | * 时长(5秒或8秒) 154 | * 质量(360p, 540p, 720p, 1080p) 155 | * 运动模式(正常或快速) 156 | 157 | ### 脚本 + 视频 158 | 159 | 通过详细脚本指导,生成更有结构和故事性的视频内容。 160 | 161 | **场景描述示例**: 162 | 163 | ``` 164 | 请根据以下场景描述生成一段视频: 165 | 场景:清晨的海滩 166 | 太阳刚刚升起,金色的阳光洒在海面上,形成闪烁的光点。 167 | 沙滩上留下一串脚印,延伸到远处。 168 | 海浪轻轻拍打岸边,留下白色的泡沫,然后缓缓退去。 169 | 远处有一艘小船缓缓驶过平静的海面。 170 | 使用16:9的宽高比,质量选择540p,时长5秒。 171 | ``` 172 | 173 | **分镜脚本示例**: 174 | 175 | 可以把特别简单的分镜脚本放进去 176 | ``` 177 | 请根据以下分镜脚本生成视频: 178 | 开始:俯视角度的咖啡杯,热气袅袅上升 179 | 特写:咖啡表面的涟漪和纹理 180 | 过渡:咖啡被搅动,形成漩涡 181 | 结束:杯子旁放着一本打开的书和一副眼镜 182 | 使用1:1的正方形格式,540p质量,使用fast运动模式增强液体效果。 183 | ``` 184 | 185 | 也可以分镜图片脚本 (Claude Desktop 支持) 186 | ``` 187 | 请根据上传的脚本, 帮我生成一段视频 188 | ``` 189 | 190 | ### 一键视频 191 | 192 | 快速生成特定主题或风格的视频,无需复杂描述。 193 | 194 | **主题示例**: 195 | 196 | ``` 197 | 请生成一个未来科技主题的视频,包含霓虹灯效果和全息投影元素。 198 | ``` 199 | 200 | **风格示例**: 201 | 202 | ``` 203 | 请生成一个水彩画风格的花朵绽放视频,色彩要明亮而梦幻。 204 | ``` 205 | 206 | ### 创意 + 视频 207 | 208 | 结合AI的创意能力构思独特的视频概念。 209 | 210 | **风格转换示例**: 211 | 212 | ``` 213 | 这是一张城市建筑照片,请用复古风格重新诠释它,并提供相应的视频生成提示词。 214 | ``` 215 | 216 | **故事引导示例**: 217 | 218 | ``` 219 | 如果这张街道照片是电影开场,接下来会发生什么?请提供视频创意。 220 | ``` 221 | 222 | **情绪创意示例**: 223 | 224 | ``` 225 | 看一下这张森林小径照片,为我构思一个短视频创意,可以是一个微型故事或情绪变化的场景。 226 | ``` 227 | 228 | ## 功能使用指南 229 | 230 | ### 文本转视频 231 | ``` 232 | 生成一个日落海景视频,金色阳光反射在水面上 233 | ``` 234 | **参数示例**: 235 | ``` 236 | 提示词: "雄鹰在日出时翱翔于山峰之上" 237 | 质量: 720p 238 | 时长: 5秒 239 | 模型: v5 240 | 宽高比: 16:9 241 | ``` 242 | 243 | ### 图片转视频 244 | ``` 245 | 1. 上传图片 → 获得img_id 246 | 2. 使用img_id生成动画视频 247 | ``` 248 | **参数示例**: 249 | ``` 250 | 提示词: "角色走过充满发光树木的魔法森林" 251 | img_id: 12345 252 | 质量: 720p 253 | 时长: 5秒 254 | 模型: v5 255 | ``` 256 | 257 | ### 视频扩展 258 | ``` 259 | 使用source_video_id扩展现有视频 260 | ``` 261 | **参数示例**: 262 | ``` 263 | 提示词: "场景继续,角色发现了一个隐藏的洞穴" 264 | source_video_id: 67890 265 | 时长: 5秒 266 | 质量: 720p 267 | 模型: v5 268 | ``` 269 | 270 | ### 场景过渡 271 | ``` 272 | 上传两张图片,创建平滑变形动画 273 | ``` 274 | **参数示例**: 275 | ``` 276 | 提示词: "从阳光海滩转变为暴风雨夜空" 277 | first_frame_img: 11111 278 | last_frame_img: 22222 279 | 时长: 5秒 280 | 质量: 720p 281 | 模型: v5 282 | ``` 283 | 284 | ### 唇形同步 285 | ``` 286 | 视频: 287 | TTS: 选择说话人 + 输入文本 288 | 音频: 上传音频文件 + 视频 289 | ``` 290 | **参数示例**: 291 | ``` 292 | # 方法1: 生成视频 + TTS 293 | source_video_id: 33333 294 | lip_sync_tts_speaker_id: "speaker_001" 295 | lip_sync_tts_content: "欢迎来到我们的精彩视频教程" 296 | 297 | # 方法2: 生成视频 + 自定义音频 298 | source_video_id: 33333 299 | audio_media_id: 44444 300 | 301 | # 方法3: 上传视频 + TTS 302 | video_media_id: 55555 # 先上传您的视频 303 | lip_sync_tts_speaker_id: "speaker_002" 304 | lip_sync_tts_content: "这是自定义旁白" 305 | 306 | # 方法4: 上传视频 + 自定义音频 307 | video_media_id: 55555 # 先上传您的视频 308 | audio_media_id: 44444 # 先上传您的音频 309 | ``` 310 | 311 | ### 音效生成 312 | ``` 313 | 描述音效: "海浪声、海鸥叫声、轻柔的风声" 314 | ``` 315 | **参数示例**: 316 | ``` 317 | # 方法1: 生成视频 + 音效 318 | sound_effect_content: "轻柔的海浪声、海鸥叫声、柔和的风声" 319 | source_video_id: 55555 320 | original_sound_switch: true # 保留原音频 321 | 322 | # 方法2: 上传视频 + 音效 323 | sound_effect_content: "城市交通声、脚步声、城市氛围" 324 | video_media_id: 66666 # 先上传您的视频 325 | original_sound_switch: false # 替换原音频 326 | 327 | # 方法3: 完全替换音频 328 | sound_effect_content: "史诗管弦乐、雷声、戏剧性紧张感" 329 | video_media_id: 77777 # 先上传您的视频 330 | original_sound_switch: false # 用新音频替换 331 | ``` 332 | 333 | ### 融合视频 334 | ``` 335 | 上传多个图片,使用@ref_name引用 336 | 例如: @人物 站在 @城市 前,@无人机 在背景中飞行 337 | ``` 338 | **参数示例**: 339 | ``` 340 | 提示词: "@英雄 站在@城市 前,@无人机 在头顶飞行" 341 | image_references: [ 342 | {type: "subject", img_id: 66666, ref_name: "英雄"}, 343 | {type: "background", img_id: 77777, ref_name: "城市"}, 344 | {type: "subject", img_id: 88888, ref_name: "无人机"} 345 | ] 346 | 时长: 5秒 347 | 模型: v4.5 348 | 质量: 720p 349 | 宽高比: 16:9 350 | ``` 351 | 352 | ### 状态监控 353 | ``` 354 | 每6秒检查一次video_id状态,直到完成 355 | ``` 356 | **参数示例**: 357 | ``` 358 | video_id: 99999 359 | # 每6秒检查一次,直到状态变为"completed"或"failed" 360 | # 典型生成时间: 60-120秒 361 | ``` 362 | **状态**: pending → in_progress → completed/failed 363 | 364 | 365 | 366 | ## 常见问题 367 | 368 | **如何获取 PixVerse API 密钥?** 369 | - 访问 [PixVerse Platform](https://platform.pixverse.ai?utm_source=github&utm_medium=readme&utm_campaign=mcp),注册账户后在API-KEY 中生成 API 密钥。 370 | 371 | **服务器不响应怎么办?** 372 | 1. 检查您的 API 密钥是否有效 373 | 2. 确认配置文件路径是否正确 374 | 3. 查看错误日志(通常在 Claude 或 Cursor 的日志文件夹中) 375 | 376 | **如何获取积分呢?** 377 | - 如尚未在 API 平台完成充值,请先前往充值。直达链接:[PixVerse Platform](https://platform.pixverse.ai?utm_source=github&utm_medium=readme&utm_campaign=mcp) 378 | 379 | **现在支持图生视频、首尾帧功能吗?** 380 | - 目前MCP方式不支持图生视频和首尾帧功能,您可以前往 [PixVerse Platform](https://platform.pixverse.ai?utm_source=github&utm_medium=readme&utm_campaign=mcp) 或 [API文档](https://docs.platform.pixverse.ai) 通过API方式接入这些功能。 381 | 382 | **支持哪些视频格式和尺寸?** 383 | - PixVerse 支持多种视频分辨率(从 360p 到 1080p)和宽高比(从竖屏 9:16 到横屏 16:9)。 384 | - 但我们建议先使用 540p、5 秒的视频进行效果测试。 385 | 386 | **生成的视频在哪里?** 387 | - 生成的视频会通过 URL 链接提供,您可以点击链接查看、下载或分享视频。 388 | 389 | **视频生成需要多长时间?** 390 | - 根据视频复杂度、服务器负载和网络状况,通常需要 30 秒到 2 分钟不等。 391 | 392 | **如果遇到spawn uvx ENOENT 问题,怎么解决?** 393 | - 此类问题主要是安装uvx/uv path导致,可以通过以下方式解决 394 | 395 | Mac/Linux 396 | ``` 397 | sudo cp ./uvx /usr/local/bin 398 | ``` 399 | 400 | Windows 401 | 1. 首先了解安装的 uv/uvx 在哪个位置,在终端输入 402 | ``` 403 | where uvx 404 | ``` 405 | 2. 打开文件管理器,找到uvx/uv文件 406 | 3. 放到 C:\Program Files (x86) , C:\Program Files 407 | 408 | ## 社区与支持 409 | 410 | ### 社区支持 411 | 412 | 加入我们的社区,获取最新更新、分享您的创作、获取帮助或提供反馈: 413 | - [Discord server](https://discord.gg/pixverse) : 加入我们的 Discord 服务器 414 | 415 | ### 技术支持 416 | 417 | 如果您遇到任何问题或需要帮助,请通过以下方式联系我们: 418 | - 电子邮件: [api@pixverse.ai](mailto:api@pixverse.ai) 419 | - 官方网站: [https://platform.pixverse.ai](https://platform.pixverse.ai?utm_source=github&utm_medium=readme&utm_campaign=mcp) 420 | 421 | ## Release Notes 422 | v2.0.0 (最新) 423 | - **新增**: 图片转视频 424 | - **新增**: 视频扩展和首尾帧 425 | - **新增**: 唇形同步和音效生成 426 | - **新增**: 融合视频和资源上传 427 | - **新增**: 实时状态监控 428 | - **改进**: 增强错误处理和并行生成 429 | 430 | v1.0.0 431 | - 通过MCP支持文生视频,获取视频链接 432 | - 与Claude & Cursor结合创造更多玩法 433 | - 支持Cloud Python MCP server 434 | -------------------------------------------------------------------------------- /src/models/requests.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pydantic models for Pixverse API requests. 3 | """ 4 | 5 | from typing import List, Optional, Union 6 | 7 | from pydantic import BaseModel, Field, field_validator, model_validator 8 | 9 | from .common import ( 10 | AspectRatio, 11 | CameraMovement, 12 | Duration, 13 | FusionType, 14 | ImageId, 15 | ModelVersion, 16 | MotionMode, 17 | Seed, 18 | SoundMode, 19 | TemplateId, 20 | VideoId, 21 | VideoQuality, 22 | VideoStyle, 23 | ) 24 | 25 | 26 | class SoundEffectInfo(BaseModel): 27 | """Sound effect configuration.""" 28 | 29 | sound_effect_switch: bool = False 30 | sound_effect_content: Optional[str] = None 31 | sound_mode: Optional[SoundMode] = None 32 | 33 | @model_validator(mode="after") 34 | def validate_sound_content(self): 35 | if self.sound_effect_switch and self.sound_mode and self.sound_effect_content: 36 | raise ValueError("sound_effect_content and sound_mode cannot be used together") 37 | return self 38 | 39 | 40 | class TextToVideoRequest(BaseModel): 41 | """Request model for text-to-video generation.""" 42 | 43 | prompt: str = Field(..., max_length=2048, description="Text prompt for video generation") 44 | model: ModelVersion = Field(ModelVersion.V5, description="Model version to use") 45 | duration: Duration = Field(5, description="Video duration in seconds") 46 | aspect_ratio: AspectRatio = Field(AspectRatio.RATIO_16_9, description="Video aspect ratio") 47 | quality: VideoQuality = Field(VideoQuality.Q540P, description="Video quality") 48 | 49 | # Optional parameters 50 | negative_prompt: Optional[str] = Field(None, max_length=2048) 51 | seed: Optional[Seed] = None 52 | style: Optional[VideoStyle] = None 53 | motion_mode: Optional[MotionMode] = MotionMode.NORMAL 54 | camera_movement: Optional[CameraMovement] = None 55 | template_id: Optional[TemplateId] = None 56 | water_mark: bool = False 57 | play_bgm: bool = False 58 | 59 | # Sound effect 60 | sound_effect_switch: bool = False 61 | sound_effect_content: Optional[str] = Field(None, max_length=2048) 62 | sound_mode: Optional[SoundMode] = None 63 | 64 | @model_validator(mode="after") 65 | def validate_camera_movement_template(self): 66 | if self.camera_movement and self.template_id: 67 | raise ValueError("camera_movement cannot be used with template_id") 68 | return self 69 | 70 | 71 | class ImageToVideoRequest(BaseModel): 72 | """Request model for image-to-video generation.""" 73 | 74 | prompt: str = Field(..., max_length=2048, description="Text prompt for video generation") 75 | model: ModelVersion = Field(..., description="Model version to use") 76 | duration: Duration = Field(..., description="Video duration in seconds") 77 | quality: VideoQuality = Field(..., description="Video quality") 78 | 79 | # Image parameters (one of these is required) 80 | img_id: Optional[ImageId] = None 81 | img_ids: Optional[List[ImageId]] = None 82 | 83 | # Optional parameters 84 | negative_prompt: Optional[str] = Field(None, max_length=2048) 85 | seed: Optional[Seed] = None 86 | style: Optional[VideoStyle] = None 87 | template_id: Optional[TemplateId] = None 88 | motion_mode: Optional[MotionMode] = MotionMode.NORMAL 89 | camera_movement: Optional[CameraMovement] = None 90 | water_mark: bool = False 91 | play_bgm: bool = False 92 | 93 | # Sound effect 94 | sound_effect_switch: bool = False 95 | sound_effect_content: Optional[str] = Field(None, max_length=2048) 96 | sound_mode: Optional[SoundMode] = None 97 | 98 | @model_validator(mode="after") 99 | def validate_image_params(self): 100 | if not self.img_id and not self.img_ids: 101 | if not self.template_id: 102 | raise ValueError("Either img_id or img_ids must be provided when template_id is not set") 103 | if self.img_id and self.img_ids: 104 | raise ValueError("Cannot use both img_id and img_ids") 105 | return self 106 | 107 | 108 | class TransitionVideoRequest(BaseModel): 109 | """Request model for transition video generation (first frame to last frame).""" 110 | 111 | prompt: str = Field(..., max_length=2048, description="Text prompt for video generation") 112 | first_frame_img: ImageId = Field(..., description="First frame image ID") 113 | last_frame_img: ImageId = Field(..., description="Last frame image ID") 114 | model: ModelVersion = Field(..., description="Model version (v3.5+)") 115 | duration: Duration = Field(..., description="Video duration in seconds") 116 | quality: VideoQuality = Field(..., description="Video quality") 117 | 118 | # Optional parameters 119 | negative_prompt: Optional[str] = Field(None, max_length=2048) 120 | seed: Optional[Seed] = None 121 | motion_mode: Optional[MotionMode] = MotionMode.NORMAL 122 | water_mark: bool = False 123 | play_bgm: bool = False 124 | 125 | # Sound effect 126 | sound_effect_switch: bool = False 127 | sound_effect_content: Optional[str] = Field(None, max_length=2048) 128 | sound_mode: Optional[SoundMode] = None 129 | 130 | 131 | class ExtendVideoRequest(BaseModel): 132 | """Request model for video extension.""" 133 | 134 | prompt: str = Field(..., max_length=2048, description="Text prompt for video extension") 135 | model: ModelVersion = Field(..., description="Model version (v3.5+)") 136 | duration: Duration = Field(..., description="Video duration in seconds") 137 | quality: VideoQuality = Field(..., description="Video quality") 138 | 139 | # Video source (one required) 140 | source_video_id: Optional[VideoId] = None 141 | video_media_id: Optional[VideoId] = None 142 | 143 | # Optional parameters 144 | negative_prompt: Optional[str] = Field(None, max_length=2048) 145 | seed: Optional[Seed] = None 146 | motion_mode: Optional[MotionMode] = MotionMode.NORMAL 147 | template_id: Optional[TemplateId] = None 148 | style: Optional[VideoStyle] = None 149 | 150 | @model_validator(mode="after") 151 | def validate_video_source(self): 152 | if not self.source_video_id and not self.video_media_id: 153 | raise ValueError("Either source_video_id or video_media_id must be provided") 154 | if self.source_video_id and self.video_media_id: 155 | raise ValueError("Cannot use both source_video_id and video_media_id") 156 | return self 157 | 158 | 159 | class LipSyncVideoRequest(BaseModel): 160 | """Request model for lip sync video generation.""" 161 | 162 | # Video source (one required) 163 | source_video_id: Optional[VideoId] = None 164 | video_media_id: Optional[VideoId] = None 165 | 166 | # Audio source (one group required) 167 | audio_media_id: Optional[int] = None 168 | lip_sync_tts_speaker_id: Optional[str] = None 169 | lip_sync_tts_content: Optional[str] = Field(None, max_length=200) 170 | 171 | @model_validator(mode="after") 172 | def validate_audio_source(self): 173 | # Validate video source 174 | if not self.source_video_id and not self.video_media_id: 175 | raise ValueError("Either source_video_id or video_media_id must be provided") 176 | if self.source_video_id and self.video_media_id: 177 | raise ValueError("Cannot use both source_video_id and video_media_id") 178 | 179 | # Validate audio source 180 | if not self.audio_media_id and not (self.lip_sync_tts_speaker_id and self.lip_sync_tts_content): 181 | raise ValueError( 182 | "Either audio_media_id or (lip_sync_tts_speaker_id + lip_sync_tts_content) must be provided" 183 | ) 184 | if self.audio_media_id and (self.lip_sync_tts_speaker_id or self.lip_sync_tts_content): 185 | raise ValueError("Cannot use audio_media_id with TTS parameters") 186 | 187 | return self 188 | 189 | 190 | class SoundEffectVideoRequest(BaseModel): 191 | """Request model for sound effect video generation.""" 192 | 193 | sound_effect_content: str = Field(..., max_length=2048, description="Sound effect description") 194 | 195 | # Video source (one required) 196 | source_video_id: Optional[VideoId] = None 197 | video_media_id: Optional[VideoId] = None 198 | 199 | # Optional parameters 200 | original_sound_switch: bool = False 201 | seed: Optional[Seed] = None 202 | 203 | @model_validator(mode="after") 204 | def validate_video_source_sound(self): 205 | if not self.source_video_id and not self.video_media_id: 206 | raise ValueError("Either source_video_id or video_media_id must be provided") 207 | if self.source_video_id and self.video_media_id: 208 | raise ValueError("Cannot use both source_video_id and video_media_id") 209 | return self 210 | 211 | 212 | class ImageReference(BaseModel): 213 | """Image reference for fusion video generation.""" 214 | 215 | type: FusionType = Field(..., description="Reference type") 216 | img_id: ImageId = Field(..., description="Image ID") 217 | ref_name: str = Field(..., max_length=30, description="Reference name for prompt") 218 | 219 | @field_validator("ref_name") 220 | @classmethod 221 | def validate_ref_name(cls, v): 222 | import re 223 | 224 | if not re.match(r"^[a-zA-Z0-9_ ]+$", v): 225 | raise ValueError("ref_name must contain only alphanumeric characters, underscores, and spaces") 226 | return v 227 | 228 | 229 | class FusionVideoRequest(BaseModel): 230 | """Request model for fusion video generation (multi-subject).""" 231 | 232 | image_references: List[ImageReference] = Field(..., min_length=1, max_length=3) 233 | prompt: str = Field(..., max_length=2048, description="Text prompt with @ref_name references") 234 | model: ModelVersion = Field(ModelVersion.V4_5, description="Model version (only v4.5 supported)") 235 | duration: Duration = Field(..., description="Video duration in seconds") 236 | quality: VideoQuality = Field(..., description="Video quality") 237 | aspect_ratio: AspectRatio = Field(..., description="Video aspect ratio") 238 | 239 | # Optional parameters 240 | negative_prompt: Optional[str] = Field(None, max_length=2048) 241 | seed: Optional[Seed] = None 242 | 243 | @field_validator("model") 244 | @classmethod 245 | def validate_model(cls, v): 246 | if v != ModelVersion.V4_5: 247 | raise ValueError("Fusion generation only supports v4.5 model") 248 | return v 249 | 250 | @model_validator(mode="after") 251 | def validate_prompt_references(self): 252 | if not self.image_references: 253 | return self 254 | 255 | # Check that all ref_names in image_references are used in prompt 256 | ref_names = {ref.ref_name for ref in self.image_references} 257 | for ref_name in ref_names: 258 | if f"@{ref_name}" not in self.prompt: 259 | raise ValueError(f"Reference @{ref_name} not found in prompt") 260 | 261 | return self 262 | 263 | @field_validator("image_references") 264 | @classmethod 265 | def validate_unique_ref_names(cls, v): 266 | ref_names = [ref.ref_name for ref in v] 267 | if len(ref_names) != len(set(ref_names)): 268 | raise ValueError("All ref_names must be unique") 269 | return v 270 | -------------------------------------------------------------------------------- /src/sse_server.py: -------------------------------------------------------------------------------- 1 | """ 2 | SSE (Server-Sent Events) server implementation for Pixverse MCP. 3 | """ 4 | 5 | import asyncio 6 | import json 7 | import uuid 8 | from typing import Any, Dict, Optional, Set 9 | from contextlib import asynccontextmanager 10 | 11 | from fastapi import FastAPI, Request, HTTPException 12 | from fastapi.responses import StreamingResponse 13 | from sse_starlette.sse import EventSourceResponse 14 | from loguru import logger 15 | 16 | from .server import PixverseMCPServer 17 | from .client import PixverseClient 18 | from .models.responses import VideoStatus 19 | 20 | 21 | class PixverseSSEServer: 22 | """SSE server for Pixverse MCP with real-time notifications.""" 23 | 24 | def __init__(self, config_path: Optional[str] = None): 25 | self.config_path = config_path 26 | self.mcp_server = PixverseMCPServer(config_path) 27 | self.clients: Dict[str, asyncio.Queue] = {} 28 | self.video_tasks: Dict[int, str] = {} # video_id -> client_id 29 | 30 | async def initialize(self): 31 | """Initialize the MCP server.""" 32 | await self.mcp_server.initialize() 33 | 34 | async def add_client(self, client_id: str) -> asyncio.Queue: 35 | """Add a new SSE client.""" 36 | queue = asyncio.Queue() 37 | self.clients[client_id] = queue 38 | logger.info(f"SSE client connected: {client_id}") 39 | return queue 40 | 41 | async def remove_client(self, client_id: str): 42 | """Remove an SSE client.""" 43 | if client_id in self.clients: 44 | del self.clients[client_id] 45 | logger.info(f"SSE client disconnected: {client_id}") 46 | 47 | async def broadcast_notification(self, event_type: str, data: Dict[str, Any]): 48 | """Broadcast notification to all connected clients.""" 49 | message = { 50 | "type": event_type, 51 | "data": data, 52 | "timestamp": asyncio.get_event_loop().time() 53 | } 54 | 55 | # Send to all clients 56 | for client_id, queue in self.clients.items(): 57 | try: 58 | await queue.put(message) 59 | except Exception as e: 60 | logger.error(f"Failed to send notification to client {client_id}: {e}") 61 | 62 | async def send_to_client(self, client_id: str, event_type: str, data: Dict[str, Any]): 63 | """Send notification to a specific client.""" 64 | if client_id not in self.clients: 65 | return 66 | 67 | message = { 68 | "type": event_type, 69 | "data": data, 70 | "timestamp": asyncio.get_event_loop().time() 71 | } 72 | 73 | try: 74 | await self.clients[client_id].put(message) 75 | except Exception as e: 76 | logger.error(f"Failed to send notification to client {client_id}: {e}") 77 | 78 | async def monitor_video_progress(self, video_id: int, client_id: str): 79 | """Monitor video generation progress and send updates.""" 80 | try: 81 | self.video_tasks[video_id] = client_id 82 | 83 | # Send initial notification 84 | await self.send_to_client(client_id, "video_started", { 85 | "video_id": video_id, 86 | "status": "pending", 87 | "message": "Video generation started" 88 | }) 89 | 90 | # Monitor progress 91 | max_attempts = 120 # 20 minutes max 92 | attempt = 0 93 | 94 | while attempt < max_attempts: 95 | attempt += 1 96 | 97 | try: 98 | result = await self.mcp_server.client.get_video_result(video_id) 99 | 100 | # Send progress update 101 | await self.send_to_client(client_id, "video_progress", { 102 | "video_id": video_id, 103 | "status": result.status.value if hasattr(result.status, 'value') else str(result.status), 104 | "attempt": attempt, 105 | "max_attempts": max_attempts 106 | }) 107 | 108 | if result.status.name == "COMPLETED": 109 | await self.send_to_client(client_id, "video_completed", { 110 | "video_id": video_id, 111 | "status": "completed", 112 | "video_url": result.video_url, 113 | "output_width": result.outputWidth, 114 | "output_height": result.outputHeight, 115 | "size": result.size, 116 | "has_audio": result.has_audio 117 | }) 118 | break 119 | elif result.status.name == "FAILED": 120 | await self.send_to_client(client_id, "video_failed", { 121 | "video_id": video_id, 122 | "status": "failed", 123 | "message": "Video generation failed" 124 | }) 125 | break 126 | 127 | # Wait before next check 128 | await asyncio.sleep(10) 129 | 130 | except Exception as e: 131 | logger.error(f"Error monitoring video {video_id}: {e}") 132 | await self.send_to_client(client_id, "video_error", { 133 | "video_id": video_id, 134 | "status": "error", 135 | "message": str(e) 136 | }) 137 | break 138 | 139 | if attempt >= max_attempts: 140 | await self.send_to_client(client_id, "video_timeout", { 141 | "video_id": video_id, 142 | "status": "timeout", 143 | "message": "Video generation timed out" 144 | }) 145 | 146 | finally: 147 | # Clean up 148 | if video_id in self.video_tasks: 149 | del self.video_tasks[video_id] 150 | 151 | 152 | def create_sse_app(config_path: Optional[str] = None) -> FastAPI: 153 | """Create FastAPI app with SSE support.""" 154 | 155 | sse_server = PixverseSSEServer(config_path) 156 | 157 | @asynccontextmanager 158 | async def lifespan(app: FastAPI): 159 | # Startup 160 | await sse_server.initialize() 161 | logger.info("🚀 Pixverse SSE Server started") 162 | yield 163 | # Shutdown 164 | logger.info("👋 Pixverse SSE Server stopped") 165 | 166 | app = FastAPI( 167 | title="Pixverse MCP SSE Server", 168 | description="Server-Sent Events interface for Pixverse video generation", 169 | version="1.0.0", 170 | lifespan=lifespan 171 | ) 172 | 173 | @app.get("/") 174 | async def root(): 175 | """Root endpoint with server info.""" 176 | return { 177 | "name": "Pixverse MCP SSE Server", 178 | "version": "1.0.0", 179 | "endpoints": { 180 | "sse": "/events", 181 | "generate_text_video": "/api/text-to-video", 182 | "generate_image_video": "/api/image-to-video", 183 | "upload_image": "/api/upload-image", 184 | "video_status": "/api/video/{video_id}/status" 185 | } 186 | } 187 | 188 | @app.get("/events") 189 | async def sse_endpoint(request: Request): 190 | """SSE endpoint for real-time notifications.""" 191 | client_id = str(uuid.uuid4()) 192 | 193 | async def event_generator(): 194 | queue = await sse_server.add_client(client_id) 195 | 196 | try: 197 | # Send welcome message 198 | yield { 199 | "event": "connected", 200 | "data": json.dumps({ 201 | "client_id": client_id, 202 | "message": "Connected to Pixverse SSE server" 203 | }) 204 | } 205 | 206 | # Stream events 207 | while True: 208 | try: 209 | # Wait for message with timeout 210 | message = await asyncio.wait_for(queue.get(), timeout=30.0) 211 | yield { 212 | "event": message["type"], 213 | "data": json.dumps(message["data"]) 214 | } 215 | except asyncio.TimeoutError: 216 | # Send heartbeat 217 | yield { 218 | "event": "heartbeat", 219 | "data": json.dumps({"timestamp": asyncio.get_event_loop().time()}) 220 | } 221 | 222 | except Exception as e: 223 | logger.error(f"SSE stream error for client {client_id}: {e}") 224 | finally: 225 | await sse_server.remove_client(client_id) 226 | 227 | return EventSourceResponse(event_generator()) 228 | 229 | @app.post("/api/text-to-video") 230 | async def generate_text_video(request: Request): 231 | """Generate video from text with SSE notifications.""" 232 | try: 233 | data = await request.json() 234 | client_id = request.headers.get("X-Client-ID") 235 | 236 | if not client_id: 237 | raise HTTPException(status_code=400, detail="X-Client-ID header required") 238 | 239 | # Call MCP server 240 | result = await sse_server.mcp_server.call_tool("text_to_video", data) 241 | 242 | if "video_id" in result.content[0].text: 243 | video_data = json.loads(result.content[0].text) 244 | video_id = video_data["video_id"] 245 | 246 | # Start monitoring in background 247 | asyncio.create_task(sse_server.monitor_video_progress(video_id, client_id)) 248 | 249 | return {"video_id": video_id, "status": "started"} 250 | else: 251 | raise HTTPException(status_code=500, detail="Failed to start video generation") 252 | 253 | except Exception as e: 254 | logger.error(f"Text-to-video generation error: {e}") 255 | raise HTTPException(status_code=500, detail=str(e)) 256 | 257 | @app.post("/api/image-to-video") 258 | async def generate_image_video(request: Request): 259 | """Generate video from image with SSE notifications.""" 260 | try: 261 | data = await request.json() 262 | client_id = request.headers.get("X-Client-ID") 263 | 264 | if not client_id: 265 | raise HTTPException(status_code=400, detail="X-Client-ID header required") 266 | 267 | # Call MCP server 268 | result = await sse_server.mcp_server.call_tool("image_to_video", data) 269 | 270 | if "video_id" in result.content[0].text: 271 | video_data = json.loads(result.content[0].text) 272 | video_id = video_data["video_id"] 273 | 274 | # Start monitoring in background 275 | asyncio.create_task(sse_server.monitor_video_progress(video_id, client_id)) 276 | 277 | return {"video_id": video_id, "status": "started"} 278 | else: 279 | raise HTTPException(status_code=500, detail="Failed to start video generation") 280 | 281 | except Exception as e: 282 | logger.error(f"Image-to-video generation error: {e}") 283 | raise HTTPException(status_code=500, detail=str(e)) 284 | 285 | @app.get("/api/video/{video_id}/status") 286 | async def get_video_status(video_id: int): 287 | """Get current video status.""" 288 | try: 289 | result = await sse_server.mcp_server.client.get_video_result(video_id) 290 | return { 291 | "video_id": video_id, 292 | "status": result.status.value if hasattr(result.status, 'value') else str(result.status), 293 | "video_url": result.video_url if hasattr(result, 'video_url') else None, 294 | "output_width": result.outputWidth if hasattr(result, 'outputWidth') else None, 295 | "output_height": result.outputHeight if hasattr(result, 'outputHeight') else None, 296 | "size": result.size if hasattr(result, 'size') else None, 297 | "has_audio": result.has_audio if hasattr(result, 'has_audio') else None 298 | } 299 | except Exception as e: 300 | logger.error(f"Get video status error: {e}") 301 | raise HTTPException(status_code=500, detail=str(e)) 302 | 303 | return app 304 | 305 | 306 | async def run_sse_server(config_path: Optional[str] = None, host: str = "0.0.0.0", port: int = 8080): 307 | """Run the SSE server.""" 308 | import uvicorn 309 | 310 | app = create_sse_app(config_path) 311 | 312 | config = uvicorn.Config( 313 | app=app, 314 | host=host, 315 | port=port, 316 | log_level="info", 317 | access_log=True 318 | ) 319 | 320 | server = uvicorn.Server(config) 321 | await server.serve() 322 | -------------------------------------------------------------------------------- /src/client/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base HTTP client for Pixverse API. 3 | """ 4 | 5 | import asyncio 6 | import uuid 7 | from typing import Any, Dict, Optional, Union 8 | from urllib.parse import urljoin 9 | 10 | import httpx 11 | from loguru import logger 12 | from tenacity import retry, stop_after_attempt, wait_exponential 13 | 14 | from ..exceptions import ( 15 | PixverseAPIError, 16 | PixverseAuthError, 17 | PixverseConnectionError, 18 | PixverseRateLimitError, 19 | PixverseTimeoutError, 20 | ) 21 | from ..models.responses import APIResponse, ErrorResponse 22 | 23 | 24 | class BaseClient: 25 | """Base HTTP client for Pixverse API interactions.""" 26 | 27 | def __init__( 28 | self, 29 | api_key: str, 30 | base_url: str = "https://app-api.pixverseai.cn", 31 | timeout: float = 30.0, 32 | max_retries: int = 3, 33 | rate_limit_per_minute: int = 60, 34 | ): 35 | """ 36 | Initialize the base client. 37 | 38 | Args: 39 | api_key: Pixverse API key 40 | base_url: Base URL for the API 41 | timeout: Request timeout in seconds 42 | max_retries: Maximum number of retries for failed requests 43 | rate_limit_per_minute: Rate limit per minute 44 | """ 45 | self.api_key = api_key 46 | self.base_url = base_url.rstrip("/") 47 | self.timeout = timeout 48 | self.max_retries = max_retries 49 | 50 | # Create HTTP client with default headers 51 | self._client = httpx.AsyncClient( 52 | timeout=httpx.Timeout(timeout), 53 | headers=self._get_default_headers(), 54 | ) 55 | 56 | # Rate limiting 57 | self._rate_limiter = asyncio.Semaphore(rate_limit_per_minute) 58 | self._last_request_time = 0.0 59 | 60 | def _get_default_headers(self) -> Dict[str, str]: 61 | """Get default headers for API requests.""" 62 | return { 63 | "API-KEY": self.api_key, 64 | "Accept": "application/json", 65 | "User-Agent": "pixverse-mcp/0.1.0", 66 | } 67 | 68 | def _generate_trace_id(self) -> str: 69 | """Generate a unique trace ID for request tracking.""" 70 | return str(uuid.uuid4()) 71 | 72 | async def _handle_rate_limit(self) -> None: 73 | """Handle rate limiting.""" 74 | async with self._rate_limiter: 75 | current_time = asyncio.get_event_loop().time() 76 | time_since_last = current_time - self._last_request_time 77 | 78 | # Ensure minimum interval between requests (1 second) 79 | min_interval = 60.0 / 60 # 1 request per second 80 | if time_since_last < min_interval: 81 | await asyncio.sleep(min_interval - time_since_last) 82 | 83 | self._last_request_time = asyncio.get_event_loop().time() 84 | 85 | @retry( 86 | stop=stop_after_attempt(3), 87 | wait=wait_exponential(multiplier=1, min=4, max=10), 88 | reraise=True, 89 | ) 90 | async def _make_request( 91 | self, 92 | method: str, 93 | endpoint: str, 94 | data: Optional[Dict[str, Any]] = None, 95 | params: Optional[Dict[str, Any]] = None, 96 | headers: Optional[Dict[str, str]] = None, 97 | use_form_data: bool = False, 98 | ) -> Dict[str, Any]: 99 | """ 100 | Make an HTTP request to the API. 101 | 102 | Args: 103 | method: HTTP method (GET, POST, etc.) 104 | endpoint: API endpoint (relative to base_url) 105 | data: Request body data 106 | params: Query parameters 107 | headers: Additional headers 108 | 109 | Returns: 110 | Response data as dictionary 111 | 112 | Raises: 113 | PixverseAPIError: For API errors 114 | PixverseAuthError: For authentication errors 115 | PixverseRateLimitError: For rate limit errors 116 | PixverseTimeoutError: For timeout errors 117 | PixverseConnectionError: For connection errors 118 | """ 119 | await self._handle_rate_limit() 120 | 121 | # Prepare request 122 | url = urljoin(self.base_url, endpoint.lstrip("/")) 123 | request_headers = self._get_default_headers() 124 | request_headers["Ai-Trace-Id"] = self._generate_trace_id() 125 | 126 | # Set Content-Type for JSON requests (but not for form data) 127 | if data is not None and not use_form_data: 128 | request_headers["Content-Type"] = "application/json" 129 | 130 | if headers: 131 | request_headers.update(headers) 132 | 133 | logger.debug(f"Making {method} request to {url}") 134 | 135 | try: 136 | if use_form_data and data: 137 | # Send as form data 138 | response = await self._client.request( 139 | method=method, 140 | url=url, 141 | data=data, 142 | params=params, 143 | headers=request_headers, 144 | ) 145 | else: 146 | # Send as JSON 147 | response = await self._client.request( 148 | method=method, 149 | url=url, 150 | json=data, 151 | params=params, 152 | headers=request_headers, 153 | ) 154 | 155 | # Handle different response status codes 156 | if response.status_code == 200: 157 | response_data = response.json() 158 | 159 | # Check for API-level errors in response 160 | if isinstance(response_data, dict): 161 | err_code = response_data.get("ErrCode", 0) 162 | if err_code != 0: 163 | err_msg = response_data.get("ErrMsg", "Unknown error") 164 | 165 | # Handle specific error codes 166 | if err_code in [10001, 10002, 10003, 10004, 10005]: 167 | raise PixverseAuthError(err_msg, error_code=err_code) 168 | else: 169 | raise PixverseAPIError( 170 | err_msg, 171 | status_code=response.status_code, 172 | error_code=err_code, 173 | response_data=response_data, 174 | ) 175 | 176 | return response_data 177 | 178 | elif response.status_code == 401: 179 | raise PixverseAuthError("Authentication failed") 180 | elif response.status_code == 403: 181 | raise PixverseAuthError("Access forbidden") 182 | elif response.status_code == 429: 183 | retry_after = response.headers.get("Retry-After") 184 | raise PixverseRateLimitError( 185 | "Rate limit exceeded", 186 | retry_after=int(retry_after) if retry_after else None, 187 | ) 188 | else: 189 | # Try to parse error response 190 | try: 191 | error_data = response.json() 192 | error_msg = error_data.get("ErrMsg", f"HTTP {response.status_code}") 193 | except: 194 | error_msg = f"HTTP {response.status_code}: {response.text}" 195 | 196 | raise PixverseAPIError( 197 | error_msg, 198 | status_code=response.status_code, 199 | response_data=error_data if "error_data" in locals() else None, 200 | ) 201 | 202 | except httpx.TimeoutException as e: 203 | raise PixverseTimeoutError(f"Request timeout: {e}") 204 | except httpx.ConnectError as e: 205 | raise PixverseConnectionError(f"Connection error: {e}") 206 | except httpx.HTTPError as e: 207 | raise PixverseConnectionError(f"HTTP error: {e}") 208 | 209 | async def get( 210 | self, 211 | endpoint: str, 212 | params: Optional[Dict[str, Any]] = None, 213 | headers: Optional[Dict[str, str]] = None, 214 | ) -> Dict[str, Any]: 215 | """Make a GET request.""" 216 | return await self._make_request("GET", endpoint, params=params, headers=headers) 217 | 218 | async def post( 219 | self, 220 | endpoint: str, 221 | data: Optional[Dict[str, Any]] = None, 222 | headers: Optional[Dict[str, str]] = None, 223 | use_form_data: bool = False, 224 | ) -> Dict[str, Any]: 225 | """Make a POST request.""" 226 | return await self._make_request("POST", endpoint, data=data, headers=headers, use_form_data=use_form_data) 227 | 228 | async def put( 229 | self, 230 | endpoint: str, 231 | data: Optional[Dict[str, Any]] = None, 232 | headers: Optional[Dict[str, str]] = None, 233 | ) -> Dict[str, Any]: 234 | """Make a PUT request.""" 235 | return await self._make_request("PUT", endpoint, data=data, headers=headers) 236 | 237 | async def delete( 238 | self, 239 | endpoint: str, 240 | headers: Optional[Dict[str, str]] = None, 241 | ) -> Dict[str, Any]: 242 | """Make a DELETE request.""" 243 | return await self._make_request("DELETE", endpoint, headers=headers) 244 | 245 | async def upload_file( 246 | self, 247 | endpoint: str, 248 | file_path: str, 249 | field_name: str = "file", 250 | additional_data: Optional[Dict[str, Any]] = None, 251 | headers: Optional[Dict[str, str]] = None, 252 | ) -> Dict[str, Any]: 253 | """ 254 | Upload a file using multipart/form-data. 255 | 256 | Args: 257 | endpoint: API endpoint 258 | file_path: Path to the file to upload 259 | field_name: Form field name for the file 260 | additional_data: Additional form data 261 | headers: Additional headers 262 | 263 | Returns: 264 | Response data as dictionary 265 | """ 266 | import os 267 | from pathlib import Path 268 | 269 | await self._handle_rate_limit() 270 | 271 | # Prepare request 272 | url = urljoin(self.base_url, endpoint.lstrip("/")) 273 | request_headers = self._get_default_headers() 274 | request_headers["Ai-Trace-Id"] = self._generate_trace_id() 275 | 276 | if headers: 277 | request_headers.update(headers) 278 | 279 | logger.debug(f"Uploading file {file_path} to {url}") 280 | 281 | try: 282 | # Prepare file and form data 283 | file_path_obj = Path(file_path) 284 | if not file_path_obj.exists(): 285 | raise FileNotFoundError(f"File not found: {file_path}") 286 | 287 | data = additional_data or {} 288 | 289 | with open(file_path, "rb") as file_handle: 290 | files = { 291 | field_name: ( 292 | file_path_obj.name, 293 | file_handle, 294 | self._get_content_type(file_path_obj.suffix) 295 | ) 296 | } 297 | 298 | response = await self._client.request( 299 | method="POST", 300 | url=url, 301 | files=files, 302 | data=data, 303 | headers=request_headers, 304 | ) 305 | 306 | # Handle response same as _make_request 307 | if response.status_code == 200: 308 | response_data = response.json() 309 | 310 | # Check for API-level errors in response 311 | if isinstance(response_data, dict): 312 | err_code = response_data.get("ErrCode", 0) 313 | if err_code != 0: 314 | err_msg = response_data.get("ErrMsg", "Unknown error") 315 | 316 | # Handle specific error codes 317 | if err_code in [10001, 10002, 10003, 10004, 10005]: 318 | raise PixverseAuthError(err_msg, error_code=err_code) 319 | else: 320 | raise PixverseAPIError( 321 | err_msg, 322 | status_code=response.status_code, 323 | error_code=err_code, 324 | response_data=response_data, 325 | ) 326 | 327 | return response_data 328 | 329 | elif response.status_code == 401: 330 | raise PixverseAuthError("Authentication failed") 331 | elif response.status_code == 403: 332 | raise PixverseAuthError("Access forbidden") 333 | elif response.status_code == 429: 334 | retry_after = response.headers.get("Retry-After") 335 | raise PixverseRateLimitError( 336 | "Rate limit exceeded", 337 | retry_after=int(retry_after) if retry_after else None, 338 | ) 339 | else: 340 | # Try to parse error response 341 | try: 342 | error_data = response.json() 343 | error_msg = error_data.get("ErrMsg", f"HTTP {response.status_code}") 344 | except: 345 | error_msg = f"HTTP {response.status_code}: {response.text}" 346 | 347 | raise PixverseAPIError( 348 | error_msg, 349 | status_code=response.status_code, 350 | response_data=error_data if "error_data" in locals() else None, 351 | ) 352 | 353 | except httpx.TimeoutException as e: 354 | raise PixverseTimeoutError(f"Request timeout: {e}") 355 | except httpx.ConnectError as e: 356 | raise PixverseConnectionError(f"Connection error: {e}") 357 | except httpx.HTTPError as e: 358 | raise PixverseConnectionError(f"HTTP error: {e}") 359 | 360 | def _get_content_type(self, file_extension: str) -> str: 361 | """Get content type based on file extension.""" 362 | content_types = { 363 | ".jpg": "image/jpeg", 364 | ".jpeg": "image/jpeg", 365 | ".png": "image/png", 366 | ".gif": "image/gif", 367 | ".webp": "image/webp", 368 | ".mp4": "video/mp4", 369 | ".avi": "video/avi", 370 | ".mov": "video/quicktime", 371 | ".webm": "video/webm", 372 | ".mp3": "audio/mpeg", 373 | ".wav": "audio/wav", 374 | ".ogg": "audio/ogg", 375 | ".m4a": "audio/mp4", 376 | } 377 | return content_types.get(file_extension.lower(), "application/octet-stream") 378 | 379 | async def close(self) -> None: 380 | """Close the HTTP client.""" 381 | await self._client.aclose() 382 | 383 | async def __aenter__(self): 384 | """Async context manager entry.""" 385 | return self 386 | 387 | async def __aexit__(self, exc_type, exc_val, exc_tb): 388 | """Async context manager exit.""" 389 | await self.close() 390 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PixVerse MCP 2 |
3 | 4 | Webapp 5 | 6 | 7 | API 8 | 9 |
10 | 11 | A comprehensive tool that allows you to access PixVerse's latest video generation models via applications that support the Model Context Protocol (MCP), such as Claude or Cursor. Generate videos from text, animate images, create transitions, add lip sync, sound effects, and much more! 12 | 13 | [中文文档](https://github.com/PixVerseAI/PixVerse-MCP/blob/main/README-CN.md) 14 | 15 | 16 | https://github.com/user-attachments/assets/08ce90b7-2591-4256-aff2-9cc51e156d00 17 | 18 | 19 | ## Overview 20 | 21 | PixVerse MCP is a powerful tool that enables you to access PixVerse's latest video generation models through applications that support the Model Context Protocol (MCP). This integration allows you to generate high-quality videos with advanced features including text-to-video, image-to-video, video extensions, transitions, lip sync, sound effects, and more. 22 | 23 | ## Key Features 24 | 25 | - **Text-to-Video Generation**: Generate creative videos using text prompts 26 | - **Image-to-Video Animation**: Animate static images into dynamic videos 27 | - **Flexible Parameter Control**: Adjust video quality, length, aspect ratio, and more 28 | - **Video Extension**: Extend existing videos seamlessly for longer sequences 29 | - **Scene Transitions**: Create smooth morphing between different images 30 | - **Lip Sync**: Add realistic lip sync to talking head videos with TTS or custom audio 31 | - **Sound Effects**: Generate contextual sound effects based on video content 32 | - **Fusion Video**: Composite multiple subjects into one scene (v4.5 only) 33 | - **Resource Management**: Upload images and videos from local files or URLs 34 | - **Co-Creation with AI Assistants**: Collaborate with AI models like Claude to enhance your creative workflow 35 | 36 | ## System Components 37 | 38 | The system consists of two main components: 39 | 40 | 1. **UVX MCP Server** 41 | - Python-based cloud server 42 | - Communicates directly with the PixVerse API 43 | - Provides full video generation capabilities 44 | 45 | ## Installation & Configuration 46 | 47 | ### Prerequisites 48 | 49 | 1. Python 3.10 or higher 50 | 2. UV/UVX 51 | 3. PixVerse API Key: Obtain from PixVerse Platform (This feature requires API Credits, which must be purchased separately on [PixVerse Platform](https://platform.pixverse.ai?utm_source=github&utm_medium=readme&utm_campaign=mcp) 52 | 53 | 54 | ### Get Dependencies 55 | 56 | 1. **Python**: 57 | - Download and install from the official Python website 58 | - Ensure Python is added to your system path 59 | 60 | 2. **UV/UVX**: 61 | - Install uv and set up our Python project and environment: 62 | 63 | #### Mac/Linux 64 | ``` 65 | curl -LsSf https://astral.sh/uv/install.sh | sh 66 | ``` 67 | 68 | #### Windows 69 | ``` 70 | powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex" 71 | ``` 72 | 73 | ## How to Use MCP Server 74 | 75 | ### 1. Get PixVerse API Key 76 | - Visit the [PixVerse Platform](https://platform.pixverse.ai?utm_source=github&utm_medium=readme&utm_campaign=mcp) 77 | - Register or log into your account 78 | - Create and copy your API key from the account settings 79 | - [API key generation guide](https://docs.platform.pixverse.ai/how-to-get-api-key-882968m0) 80 | 81 | ### 2. Download Required Dependencies 82 | - **Python**: Install Python 3.10 or above 83 | - **UV/UVX**: Install the latest stable version of UV & UVX 84 | 85 | ### 3. Configure MCP Client 86 | - Open your MCP client (e.g., Claude for Desktop or Cursor) 87 | - Locate the client settings 88 | - Open mcp_config.json (or relevant config file) 89 | - Add the configuration based on the method you use: 90 | 91 | ```json 92 | { 93 | "mcpServers": { 94 | "PixVerse": { 95 | "command": "uvx", 96 | "args": [ 97 | "pixverse-mcp" 98 | ], 99 | "env": { 100 | "PIXVERSE_API_KEY": "your-api-key-here" 101 | } 102 | } 103 | } 104 | } 105 | ``` 106 | 107 | - Add the API key obtained from platform.pixverse.ai under `"PIXVERSE_API_KEY": "xxxx"` 108 | - Save the config file 109 | 110 | ### 5. Restart MCP Client or Refresh MCP Server 111 | - Fully close and reopen your MCP client 112 | - Or use the "Refresh MCP Server" option if supported 113 | 114 | ## Client-specific Configuration 115 | 116 | ### Claude for Desktop 117 | 118 | 1. Open the Claude application 119 | 2. Navigate to Claude > Settings > Developer > Edit Config 120 | 3. Open the claude_desktop_config.json file 121 | - Windows 122 | - Mac : ~/Library/Application\ Support/Claude/claude_desktop_config.json 123 | 4. Add the configuration above and save 124 | 5. Restart Claude 125 | - If connected successfully: the homepage will not show any error and the MCP status will be green 126 | - If connection fails: an error message will be shown on the homepage 127 | 128 | ### Cursor 129 | 130 | 1. Open the Cursor application 131 | 2. Go to Settings > Model Context Protocol 132 | 3. Add a new server 133 | 4. Fill in the server details as in the JSON config above 134 | 5. Save and restart or refresh the MCP server 135 | 136 | ## Advanced Usage Example 137 | 138 | ### Text-to-Video 139 | 140 | Use natural language prompts via Claude or Cursor to generate videos. 141 | 142 | **Basic Example**: 143 | ``` 144 | Generate a video of a sunset over the ocean. Golden sunlight reflects on the water as waves gently hit the shore. 145 | ``` 146 | 147 | **Advanced Example with Parameters**: 148 | ``` 149 | Generate a night cityscape video with the following parameters: 150 | Content: Skyscraper lights twinkling under the night sky, with car lights forming streaks on the road 151 | Aspect Ratio: 16:9 152 | Quality: 540p 153 | Duration: 5 seconds 154 | Motion Mode: normal 155 | Negative Prompts: blur, shaking, text 156 | ``` 157 | 158 | **Supported Parameters**: 159 | - Aspect Ratio: 16:9, 4:3, 1:1, 3:4, 9:16 160 | - Duration: 5s or 8s 161 | - Quality: 360p, 540p, 720p, 1080p 162 | - Motion Mode: normal or fast 163 | 164 | ### Script + Video 165 | 166 | Use detailed scene descriptions or shot lists to create more structured videos. 167 | 168 | **Scene Description Example**: 169 | ``` 170 | Scene: A beach in the early morning. 171 | The sun is rising, casting golden reflections on the sea. 172 | Footprints stretch across the sand. 173 | Gentle waves leave white foam as they retreat. 174 | A small boat slowly sails across the calm sea in the distance. 175 | Aspect Ratio: 16:9, Quality: 540p, Duration: 5 seconds. 176 | ``` 177 | 178 | **Shot-by-Shot Example**: 179 | ``` 180 | Generate a video based on this storyboard: 181 | - Start: Top-down shot of a coffee cup with steam rising 182 | - Close-up: Ripples and texture on the coffee surface 183 | - Transition: Stirring creates a vortex 184 | - End: An open book and glasses next to the cup 185 | Format: 1:1 square, Quality: 540p, Motion: fast 186 | ``` 187 | - Claude Desktop also supports storyboard image input. 188 | 189 | ### One-Click Video 190 | 191 | Quickly generate videos of specific themes or styles without detailed descriptions. 192 | 193 | **Theme Example**: 194 | ``` 195 | Generate a video with a futuristic technology theme, including neon lights and holographic projections. 196 | ``` 197 | 198 | **Style Example**: 199 | ``` 200 | Generate a watercolor-style video of blooming flowers with bright, dreamy colors. 201 | ``` 202 | 203 | ### Creative + Video 204 | 205 | Combine AI's creativity with video generation. 206 | 207 | **Style Transfer Example**: 208 | ``` 209 | This is a photo of a cityscape. Reinterpret it with a retro style and provide a video prompt. 210 | ``` 211 | 212 | **Story Prompt Example**: 213 | ``` 214 | If this street photo is the opening scene of a movie, what happens next? Provide a short video concept. 215 | ``` 216 | 217 | **Emotional Scene Example**: 218 | ``` 219 | Look at this forest path photo and design a short video concept, either a micro-story or a scene with emotional progression. 220 | ``` 221 | 222 | 223 | ## Feature Usage Gudie 224 | ### Text-to-Video 225 | ``` 226 | Generate a sunset ocean video with golden sunlight reflecting on the water 227 | ``` 228 | **Example with parameters**: 229 | ``` 230 | Prompt: "A majestic eagle soaring over mountain peaks at sunrise" 231 | Quality: 720p 232 | Duration: 5 233 | Model: v5 234 | Aspect Ratio: 16:9 235 | ``` 236 | **Parameters**: Quality(360p-1080p), Duration(5s/8s), Aspect Ratio(16:9/1:1/9:16), model(v4.5/v5) 237 | 238 | ### Image-to-Video 239 | ``` 240 | 1. Upload image → Get img_id 241 | 2. Use img_id to generate animated video 242 | ``` 243 | **Example with parameters**: 244 | ``` 245 | Prompt: "The character walks through a magical forest with glowing trees" 246 | img_id: 12345 247 | Quality: 720p 248 | Duration: 5s 249 | Model: v5 250 | ``` 251 | 252 | ### Video Extension 253 | ``` 254 | Use source_video_id to extend existing video 255 | ``` 256 | **Example with parameters**: 257 | ``` 258 | Prompt: "The scene continues with the character discovering a hidden cave" 259 | source_video_id: 67890 260 | Duration: 5s 261 | Quality: 720p 262 | Model: v5 263 | ``` 264 | 265 | ### Scene Transitions 266 | ``` 267 | Upload two images to create smooth morphing animation 268 | ``` 269 | **Example with parameters**: 270 | ``` 271 | Prompt: "Transform from sunny beach to stormy night sky" 272 | first_frame_img: 11111 273 | last_frame_img: 22222 274 | Duration: 5s 275 | Quality: 720p 276 | Model: v5 277 | ``` 278 | 279 | ### Lip Sync 280 | ``` 281 | Video: 282 | TTS: Choose speaker + input text 283 | Audio: Upload audio file + video 284 | ``` 285 | **Example with parameters**: 286 | ``` 287 | # Method 1: Generated Video + TTS 288 | source_video_id: 33333 289 | lip_sync_tts_speaker_id: "speaker_001" 290 | lip_sync_tts_content: "Welcome to our amazing video tutorial" 291 | 292 | # Method 2: Generated Video + Custom Audio 293 | source_video_id: 33333 294 | audio_media_id: 44444 295 | 296 | # Method 3: Uploaded Video + TTS 297 | video_media_id: 55555 # Upload your video first 298 | lip_sync_tts_speaker_id: "speaker_002" 299 | lip_sync_tts_content: "This is a custom narration" 300 | 301 | # Method 4: Uploaded Video + Custom Audio 302 | video_media_id: 55555 # Upload your video first 303 | audio_media_id: 44444 # Upload your audio first 304 | ``` 305 | 306 | ### Sound Effects 307 | ``` 308 | Describe effects: "Ocean waves, seagull calls, gentle wind" 309 | ``` 310 | **Example with parameters**: 311 | ``` 312 | # Method 1: Generated Video + Sound Effects 313 | sound_effect_content: "Gentle ocean waves, seagull calls, soft wind" 314 | source_video_id: 55555 315 | original_sound_switch: true # Keep original audio 316 | 317 | # Method 2: Uploaded Video + Sound Effects 318 | sound_effect_content: "Urban traffic, footsteps, city ambiance" 319 | video_media_id: 66666 # Upload your video first 320 | original_sound_switch: false # Replace original audio 321 | 322 | # Method 3: Replace Audio Completely 323 | sound_effect_content: "Epic orchestral music, thunder, dramatic tension" 324 | video_media_id: 77777 # Upload your video first 325 | original_sound_switch: false # Replace with new audio 326 | ``` 327 | 328 | ### Fusion Video 329 | ``` 330 | Upload multiple images, use @ref_name references 331 | Example: @person standing in front of @city with @drone flying overhead 332 | ``` 333 | **Example with parameters**: 334 | ``` 335 | Prompt: "@hero standing in front of @city with @drone flying overhead" 336 | image_references: [ 337 | {type: "subject", img_id: 66666, ref_name: "hero"}, 338 | {type: "background", img_id: 77777, ref_name: "city"}, 339 | {type: "subject", img_id: 88888, ref_name: "drone"} 340 | ] 341 | Duration: 5s 342 | Model:v4.5 343 | Quality: 720p 344 | Aspect Ratio: 16:9 345 | ``` 346 | 347 | ### 📊 Status Monitoring 348 | ``` 349 | Check video_id status every 6 seconds until completion 350 | ``` 351 | **Example with parameters**: 352 | ``` 353 | video_id: 99999 354 | # Check every 6 seconds until status becomes "completed" or "failed" 355 | # Typical generation time: 60-120 seconds 356 | ``` 357 | **Status**: pending → in_progress → completed/failed 358 | 359 | 360 | ## FAQ 361 | 362 | **How do I get a PixVerse API key?** 363 | - Register at the PixVerse Platform and generate it under "API-KEY" in your account. 364 | 365 | **What should I do if the server doesn't respond?** 366 | 1. Check whether your API key is valid 367 | 2. Ensure the configuration file path is correct 368 | 3. View error logs (typically in the log folders of Claude or Cursor) 369 | 370 | **Does MCP support image-to-video or keyframe features?** 371 | - Not yet. These features are only available via the PixVerse API. [API Docs](https://docs.platform.pixverse.ai) 372 | 373 | **How to obtain credits?** 374 | - If you haven't topped up on the API platform yet, please do so first. [PixVerse Platform](https://platform.pixverse.ai/billing?utm_source=github&utm_medium=readme&utm_campaign=mcp) 375 | 376 | **What video formats and sizes are supported?** 377 | - PixVerse supports resolutions from 360p to 1080p, and aspect ratios from 9:16 (portrait) to 16:9 (landscape). 378 | - We recommend starting with 540p and 5-second videos to test the output quality. 379 | 380 | **Where can I find the generated video?** 381 | - You will receive a URL link to view, download, or share the video. 382 | 383 | **How long does video generation take?** 384 | - Typically 30 seconds to 2 minutes depending on complexity, server load, and network conditions. 385 | 386 | **What to do if you encounter a spawn uvx ENOENT error?** 387 | - This error is typically caused by incorrect UV/UVX installation paths. You can resolve it as follows: 388 | 389 | For Mac/Linux: 390 | ``` 391 | sudo cp ./uvx /usr/local/bin 392 | ``` 393 | 394 | For Windows: 395 | 1. Identify the installation path of UV/UVX by running the following command in the terminal: 396 | ``` 397 | where uvx 398 | ``` 399 | 2. Open File Explorer and locate the uvx/uv files. 400 | 3. Move the files to one of the following directories: 401 | - C:\Program Files (x86) or C:\Program Files 402 | 403 | ## Community & Support 404 | ### Community 405 | - Join our [Discord server](https://discord.gg/pixverse) to receive updates, share creations, get help, or give feedback. 406 | 407 | ### Technical Support 408 | - Email: api@pixverse.ai 409 | - Website: https://platform.pixverse.ai 410 | 411 | ## Release Notes 412 | v2.0.0 (Latest) 413 | - **NEW**: Image-to-video animation 414 | - **NEW**: Video extension for longer sequences 415 | - **NEW**: Scene transitions between images 416 | - **NEW**: Lip sync with TTS and custom audio 417 | - **NEW**: AI-generated sound effects 418 | - **NEW**: Fusion video for composite scenes 419 | - **NEW**: TTS speaker selection 420 | - **NEW**: Resource upload (images/videos) with file or url 421 | - **NEW**: Real-time status monitoring 422 | - **IMPROVED**: Enhanced error handling and user feedback 423 | - **IMPROVED**: Parallel video generation support 424 | 425 | v1.0.0 426 | - Supports text-to-video generation via MCP 427 | - Enables video link retrieval 428 | - Integrates with Claude and Cursor for enhanced workflows 429 | - Supports Cloud based Python MCP servers 430 | -------------------------------------------------------------------------------- /src/client/pixverse.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main Pixverse API client. 3 | """ 4 | 5 | from typing import Any, Dict, List, Optional 6 | 7 | from loguru import logger 8 | 9 | from ..exceptions import PixverseValidationError 10 | from ..models.requests import ( 11 | ExtendVideoRequest, 12 | FusionVideoRequest, 13 | ImageToVideoRequest, 14 | LipSyncVideoRequest, 15 | SoundEffectVideoRequest, 16 | TextToVideoRequest, 17 | TransitionVideoRequest, 18 | ) 19 | from ..models.responses import ( 20 | ImageUploadResponse, 21 | LipSyncTTSListResponse, 22 | MediaUploadResponse, 23 | VideoCreditsResponse, 24 | VideoGenerationResponse, 25 | VideoStatus, 26 | ) 27 | from .base import BaseClient 28 | 29 | 30 | class PixverseClient(BaseClient): 31 | """Main client for Pixverse video generation APIs.""" 32 | 33 | def __init__(self, api_key: str, **kwargs): 34 | """ 35 | Initialize Pixverse client. 36 | 37 | Args: 38 | api_key: Pixverse API key 39 | **kwargs: Additional arguments passed to BaseClient 40 | """ 41 | super().__init__(api_key, **kwargs) 42 | logger.info("Pixverse client initialized") 43 | 44 | async def text_to_video(self, request: TextToVideoRequest) -> VideoGenerationResponse: 45 | """ 46 | Generate video from text prompt. 47 | 48 | Args: 49 | request: Text-to-video request parameters 50 | 51 | Returns: 52 | Video generation response 53 | """ 54 | logger.info(f"Generating video from text: {request.prompt[:50]}...") 55 | 56 | response_data = await self.post( 57 | "/openapi/v2/video/text/generate", 58 | data=request.dict(exclude_none=True), 59 | ) 60 | 61 | # Extract video_id from response 62 | resp_data = response_data.get("Resp", {}) 63 | video_id = resp_data.get("video_id") 64 | 65 | if not video_id: 66 | raise PixverseValidationError("No video_id in response") 67 | 68 | return VideoGenerationResponse(video_id=video_id) 69 | 70 | async def image_to_video(self, request: ImageToVideoRequest) -> VideoGenerationResponse: 71 | """ 72 | Generate video from image. 73 | 74 | Args: 75 | request: Image-to-video request parameters 76 | 77 | Returns: 78 | Video generation response 79 | """ 80 | logger.info(f"Generating video from image: {request.img_id or request.img_ids}") 81 | 82 | response_data = await self.post( 83 | "/openapi/v2/video/img/generate", 84 | data=request.dict(exclude_none=True), 85 | ) 86 | 87 | resp_data = response_data.get("Resp", {}) 88 | video_id = resp_data.get("video_id") 89 | 90 | if not video_id: 91 | raise PixverseValidationError("No video_id in response") 92 | 93 | return VideoGenerationResponse(video_id=video_id) 94 | 95 | async def transition_video(self, request: TransitionVideoRequest) -> VideoGenerationResponse: 96 | """ 97 | Generate transition video between two frames. 98 | 99 | Args: 100 | request: Transition video request parameters 101 | 102 | Returns: 103 | Video generation response 104 | """ 105 | logger.info(f"Generating transition video: {request.first_frame_img} -> {request.last_frame_img}") 106 | 107 | response_data = await self.post( 108 | "/openapi/v2/video/transition/generate", 109 | data=request.dict(exclude_none=True), 110 | ) 111 | 112 | resp_data = response_data.get("Resp", {}) 113 | video_id = resp_data.get("video_id") 114 | 115 | if not video_id: 116 | raise PixverseValidationError("No video_id in response") 117 | 118 | return VideoGenerationResponse(video_id=video_id) 119 | 120 | async def extend_video(self, request: ExtendVideoRequest) -> VideoGenerationResponse: 121 | """ 122 | Extend an existing video. 123 | 124 | Args: 125 | request: Video extension request parameters 126 | 127 | Returns: 128 | Video generation response 129 | """ 130 | logger.info(f"Extending video: {request.source_video_id or request.video_media_id}") 131 | 132 | response_data = await self.post( 133 | "/openapi/v2/video/extend/generate", 134 | data=request.dict(exclude_none=True), 135 | ) 136 | 137 | resp_data = response_data.get("Resp", {}) 138 | video_id = resp_data.get("video_id") 139 | 140 | if not video_id: 141 | raise PixverseValidationError("No video_id in response") 142 | 143 | return VideoGenerationResponse(video_id=video_id) 144 | 145 | async def lip_sync_video(self, request: LipSyncVideoRequest) -> VideoGenerationResponse: 146 | """ 147 | Generate lip sync video. 148 | 149 | Args: 150 | request: Lip sync request parameters 151 | 152 | Returns: 153 | Video generation response 154 | """ 155 | logger.info(f"Generating lip sync video: {request.source_video_id or request.video_media_id}") 156 | 157 | response_data = await self.post( 158 | "/openapi/v2/video/lip_sync/generate", 159 | data=request.dict(exclude_none=True), 160 | ) 161 | 162 | resp_data = response_data.get("Resp", {}) 163 | video_id = resp_data.get("video_id") 164 | 165 | if not video_id: 166 | raise PixverseValidationError("No video_id in response") 167 | 168 | return VideoGenerationResponse(video_id=video_id) 169 | 170 | async def sound_effect_video(self, request: SoundEffectVideoRequest) -> VideoGenerationResponse: 171 | """ 172 | Add sound effects to video. 173 | 174 | Args: 175 | request: Sound effect request parameters 176 | 177 | Returns: 178 | Video generation response 179 | """ 180 | logger.info(f"Adding sound effects to video: {request.source_video_id or request.video_media_id}") 181 | 182 | response_data = await self.post( 183 | "/openapi/v2/video/sound_effect/generate", 184 | data=request.dict(exclude_none=True), 185 | ) 186 | 187 | resp_data = response_data.get("Resp", {}) 188 | video_id = resp_data.get("video_id") 189 | 190 | if not video_id: 191 | raise PixverseValidationError("No video_id in response") 192 | 193 | return VideoGenerationResponse(video_id=video_id) 194 | 195 | async def fusion_video(self, request: FusionVideoRequest) -> VideoGenerationResponse: 196 | """ 197 | Generate fusion video with multiple subjects. 198 | 199 | Args: 200 | request: Fusion video request parameters 201 | 202 | Returns: 203 | Video generation response 204 | """ 205 | logger.info(f"Generating fusion video with {len(request.image_references)} subjects") 206 | 207 | response_data = await self.post( 208 | "/openapi/v2/video/fusion/generate", 209 | data=request.dict(exclude_none=True), 210 | ) 211 | 212 | resp_data = response_data.get("Resp", {}) 213 | video_id = resp_data.get("video_id") 214 | 215 | if not video_id: 216 | raise PixverseValidationError("No video_id in response") 217 | 218 | return VideoGenerationResponse(video_id=video_id) 219 | 220 | async def get_lip_sync_tts_list(self, page_num: int = 1, page_size: int = 30) -> LipSyncTTSListResponse: 221 | """ 222 | Get list of available TTS speakers for lip sync. 223 | 224 | Args: 225 | page_num: Page number (default: 1) 226 | page_size: Page size (default: 30) 227 | 228 | Returns: 229 | TTS speakers list response 230 | """ 231 | logger.info("Getting TTS speakers list") 232 | 233 | params = { 234 | "page_num": page_num, 235 | "page_size": page_size, 236 | } 237 | 238 | response_data = await self.get( 239 | "/openapi/v2/video/lip_sync/tts_list", 240 | params=params, 241 | ) 242 | 243 | resp_data = response_data.get("Resp", {}) 244 | return LipSyncTTSListResponse(**resp_data) 245 | 246 | async def get_video_result(self, video_id: int) -> VideoGenerationResponse: 247 | """ 248 | Get video generation result and status. 249 | 250 | Args: 251 | video_id: Video ID 252 | 253 | Returns: 254 | Video generation response with current status 255 | """ 256 | logger.info(f"Getting result for video: {video_id}") 257 | 258 | response_data = await self.get(f"/openapi/v2/video/result/{video_id}") 259 | 260 | resp_data = response_data.get("Resp", {}) 261 | 262 | # Map Go response fields to our model 263 | # Status mapping: 1=normal(completed), 6=deleted, 7=banned, 8=failed, 2=pending, 3=in_progress 264 | status_map = { 265 | 1: VideoStatus.COMPLETED, 266 | 2: VideoStatus.PENDING, 267 | 3: VideoStatus.IN_PROGRESS, 268 | 6: VideoStatus.CANCELLED, 269 | 7: VideoStatus.FAILED, 270 | 8: VideoStatus.FAILED, 271 | } 272 | 273 | go_status = resp_data.get("status", 2) # Default to pending 274 | mapped_status = status_map.get(go_status, VideoStatus.PENDING) 275 | 276 | return VideoGenerationResponse( 277 | video_id=video_id, 278 | status=mapped_status, 279 | id=resp_data.get("id"), 280 | prompt=resp_data.get("prompt"), 281 | negative_prompt=resp_data.get("negative_prompt"), 282 | resolution_ratio=resp_data.get("resolution_ratio"), 283 | url=resp_data.get("url"), 284 | size=resp_data.get("size"), 285 | seed=resp_data.get("seed"), 286 | style=resp_data.get("style"), 287 | create_time=resp_data.get("create_time"), 288 | modify_time=resp_data.get("modify_time"), 289 | outputWidth=resp_data.get("outputWidth"), 290 | outputHeight=resp_data.get("outputHeight"), 291 | has_audio=resp_data.get("has_audio"), 292 | customer_paths=resp_data.get("customer_paths") 293 | ) 294 | 295 | async def get_video_credits(self, video_id: int) -> VideoCreditsResponse: 296 | """ 297 | Get credits information for a video. 298 | 299 | Args: 300 | video_id: Video ID 301 | 302 | Returns: 303 | Video credits response 304 | """ 305 | logger.info(f"Getting credits for video: {video_id}") 306 | 307 | response_data = await self.get(f"/video/credits/{video_id}") 308 | 309 | resp_data = response_data.get("Resp", {}) 310 | return VideoCreditsResponse(**resp_data) 311 | 312 | async def upload_image(self, file_path: str = None, image_url: str = None) -> ImageUploadResponse: 313 | """ 314 | Upload image file or from URL. 315 | 316 | Args: 317 | file_path: Path to the image file (for multipart upload) 318 | image_url: URL of the image (for form/json upload) 319 | 320 | Returns: 321 | Image upload response with img_id 322 | """ 323 | if not file_path and not image_url: 324 | raise ValueError("Either file_path or image_url must be provided") 325 | 326 | if file_path and image_url: 327 | raise ValueError("Only one of file_path or image_url should be provided") 328 | 329 | if file_path: 330 | # Multipart upload for local file 331 | logger.info(f"Uploading image file: {file_path}") 332 | response_data = await self.upload_file( 333 | "/openapi/v2/image/upload", 334 | file_path=file_path, 335 | field_name="image" 336 | ) 337 | else: 338 | # Form upload for URL 339 | logger.info(f"Uploading image from URL: {image_url}") 340 | response_data = await self.post( 341 | "/openapi/v2/image/upload", 342 | data={"image_url": image_url}, 343 | use_form_data=True 344 | ) 345 | 346 | resp_data = response_data.get("Resp", {}) 347 | return ImageUploadResponse(**resp_data) 348 | 349 | async def upload_media(self, file_path: str = None, file_url: str = None, media_type: str = "video") -> MediaUploadResponse: 350 | """ 351 | Upload media file (video/audio) or from URL. 352 | 353 | Args: 354 | file_path: Path to the media file (for multipart upload) 355 | file_url: URL of the media file (for form/json upload) 356 | media_type: Type of media (video, audio) 357 | 358 | Returns: 359 | Media upload response with media_id 360 | """ 361 | if not file_path and not file_url: 362 | raise ValueError("Either file_path or file_url must be provided") 363 | 364 | if file_path and file_url: 365 | raise ValueError("Only one of file_path or file_url should be provided") 366 | 367 | if file_path: 368 | # Multipart upload for local file 369 | logger.info(f"Uploading {media_type} file: {file_path}") 370 | additional_data = {"media_type": media_type} 371 | response_data = await self.upload_file( 372 | "/openapi/v2/media/upload", 373 | file_path=file_path, 374 | field_name="file", 375 | additional_data=additional_data 376 | ) 377 | else: 378 | # Form upload for URL 379 | logger.info(f"Uploading {media_type} from URL: {file_url}") 380 | response_data = await self.post( 381 | "/openapi/v2/media/upload", 382 | data={"file_url": file_url}, 383 | use_form_data=True 384 | ) 385 | 386 | resp_data = response_data.get("Resp", {}) 387 | return MediaUploadResponse(**resp_data) 388 | 389 | # Convenience methods for common use cases 390 | 391 | async def quick_text_video( 392 | self, 393 | prompt: str, 394 | model: str = "v5", 395 | duration: int = 5, 396 | quality: str = "540p", 397 | aspect_ratio: str = "16:9", 398 | **kwargs, 399 | ) -> VideoGenerationResponse: 400 | """ 401 | Quick text-to-video generation with sensible defaults. 402 | 403 | Args: 404 | prompt: Text prompt 405 | model: Model version (default: v5) 406 | duration: Video duration (default: 5) 407 | quality: Video quality (default: 540p) 408 | aspect_ratio: Aspect ratio (default: 16:9) 409 | **kwargs: Additional parameters 410 | 411 | Returns: 412 | Video generation response 413 | """ 414 | request = TextToVideoRequest( 415 | prompt=prompt, model=model, duration=duration, quality=quality, aspect_ratio=aspect_ratio, **kwargs 416 | ) 417 | return await self.text_to_video(request) 418 | 419 | async def quick_image_video( 420 | self, img_id: int, prompt: str, model: str = "v5", duration: int = 5, quality: str = "540p", **kwargs 421 | ) -> VideoGenerationResponse: 422 | """ 423 | Quick image-to-video generation with sensible defaults. 424 | 425 | Args: 426 | img_id: Image ID 427 | prompt: Text prompt 428 | model: Model version (default: v5) 429 | duration: Video duration (default: 5) 430 | quality: Video quality (default: 540p) 431 | **kwargs: Additional parameters 432 | 433 | Returns: 434 | Video generation response 435 | """ 436 | request = ImageToVideoRequest( 437 | img_id=img_id, prompt=prompt, model=model, duration=duration, quality=quality, **kwargs 438 | ) 439 | return await self.image_to_video(request) 440 | 441 | async def wait_for_video_completion( 442 | self, 443 | video_id: int, 444 | max_wait_time: int = 300, 445 | poll_interval: int = 10, 446 | ) -> VideoGenerationResponse: 447 | """ 448 | Wait for video generation to complete by polling status. 449 | 450 | Args: 451 | video_id: Video ID to monitor 452 | max_wait_time: Maximum wait time in seconds (default: 300) 453 | poll_interval: Polling interval in seconds (default: 10) 454 | 455 | Returns: 456 | Final video generation response 457 | 458 | Raises: 459 | PixverseTimeoutError: If video doesn't complete within max_wait_time 460 | PixverseAPIError: If video generation fails 461 | """ 462 | import asyncio 463 | from ..exceptions import PixverseTimeoutError, PixverseAPIError 464 | 465 | logger.info(f"Waiting for video {video_id} to complete (max {max_wait_time}s)") 466 | 467 | start_time = asyncio.get_event_loop().time() 468 | 469 | while True: 470 | result = await self.get_video_result(video_id) 471 | 472 | if result.status == VideoStatus.COMPLETED: 473 | logger.info(f"Video {video_id} completed successfully") 474 | return result 475 | elif result.status == VideoStatus.FAILED: 476 | error_msg = result.error_message or "Video generation failed" 477 | logger.error(f"Video {video_id} failed: {error_msg}") 478 | raise PixverseAPIError(error_msg) 479 | elif result.status == VideoStatus.CANCELLED: 480 | logger.warning(f"Video {video_id} was cancelled") 481 | raise PixverseAPIError("Video generation was cancelled") 482 | 483 | # Check timeout 484 | elapsed = asyncio.get_event_loop().time() - start_time 485 | if elapsed >= max_wait_time: 486 | logger.error(f"Video {video_id} timed out after {elapsed:.1f}s") 487 | raise PixverseTimeoutError(f"Video generation timed out after {max_wait_time}s") 488 | 489 | logger.debug(f"Video {video_id} status: {result.status}, waiting {poll_interval}s...") 490 | await asyncio.sleep(poll_interval) 491 | 492 | async def upload_and_generate_video( 493 | self, 494 | image_path: str, 495 | prompt: str, 496 | model: str = "v5", 497 | duration: int = 5, 498 | quality: str = "540p", 499 | wait_for_completion: bool = True, 500 | max_wait_time: int = 300, 501 | **kwargs 502 | ) -> VideoGenerationResponse: 503 | """ 504 | Complete workflow: upload image and generate video with auto-polling. 505 | 506 | Args: 507 | image_path: Path to the image file 508 | prompt: Text prompt for video generation 509 | model: Model version (default: v5) 510 | duration: Video duration (default: 5) 511 | quality: Video quality (default: 540p) 512 | wait_for_completion: Whether to wait for completion (default: True) 513 | max_wait_time: Maximum wait time in seconds (default: 300) 514 | **kwargs: Additional parameters for video generation 515 | 516 | Returns: 517 | Final video generation response (if wait_for_completion=True) 518 | or initial response with video_id (if wait_for_completion=False) 519 | """ 520 | logger.info(f"Starting complete workflow: {image_path} -> video") 521 | 522 | # Step 1: Upload image 523 | logger.info("Step 1: Uploading image...") 524 | upload_result = await self.upload_image(image_path) 525 | logger.info(f"Image uploaded successfully, img_id: {upload_result.img_id}") 526 | 527 | # Step 2: Generate video 528 | logger.info("Step 2: Generating video...") 529 | request = ImageToVideoRequest( 530 | img_id=upload_result.img_id, 531 | prompt=prompt, 532 | model=model, 533 | duration=duration, 534 | quality=quality, 535 | **kwargs 536 | ) 537 | 538 | video_response = await self.image_to_video(request) 539 | logger.info(f"Video generation started, video_id: {video_response.video_id}") 540 | 541 | # Step 3: Wait for completion (optional) 542 | if wait_for_completion: 543 | logger.info("Step 3: Waiting for video completion...") 544 | final_result = await self.wait_for_video_completion( 545 | video_response.video_id, 546 | max_wait_time=max_wait_time 547 | ) 548 | logger.info("Complete workflow finished successfully!") 549 | return final_result 550 | else: 551 | return video_response 552 | -------------------------------------------------------------------------------- /uv.lock: -------------------------------------------------------------------------------- 1 | version = 1 2 | revision = 1 3 | requires-python = ">=3.12" 4 | 5 | [[package]] 6 | name = "annotated-types" 7 | version = "0.7.0" 8 | source = { registry = "https://pypi.org/simple" } 9 | sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081 } 10 | wheels = [ 11 | { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 }, 12 | ] 13 | 14 | [[package]] 15 | name = "anyio" 16 | version = "4.9.0" 17 | source = { registry = "https://pypi.org/simple" } 18 | dependencies = [ 19 | { name = "idna" }, 20 | { name = "sniffio" }, 21 | { name = "typing-extensions", marker = "python_full_version < '3.13'" }, 22 | ] 23 | sdist = { url = "https://files.pythonhosted.org/packages/95/7d/4c1bd541d4dffa1b52bd83fb8527089e097a106fc90b467a7313b105f840/anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028", size = 190949 } 24 | wheels = [ 25 | { url = "https://files.pythonhosted.org/packages/a1/ee/48ca1a7c89ffec8b6a0c5d02b89c305671d5ffd8d3c94acf8b8c408575bb/anyio-4.9.0-py3-none-any.whl", hash = "sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c", size = 100916 }, 26 | ] 27 | 28 | [[package]] 29 | name = "certifi" 30 | version = "2025.1.31" 31 | source = { registry = "https://pypi.org/simple" } 32 | sdist = { url = "https://files.pythonhosted.org/packages/1c/ab/c9f1e32b7b1bf505bf26f0ef697775960db7932abeb7b516de930ba2705f/certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651", size = 167577 } 33 | wheels = [ 34 | { url = "https://files.pythonhosted.org/packages/38/fc/bce832fd4fd99766c04d1ee0eead6b0ec6486fb100ae5e74c1d91292b982/certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe", size = 166393 }, 35 | ] 36 | 37 | [[package]] 38 | name = "click" 39 | version = "8.1.8" 40 | source = { registry = "https://pypi.org/simple" } 41 | dependencies = [ 42 | { name = "colorama", marker = "sys_platform == 'win32'" }, 43 | ] 44 | sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593 } 45 | wheels = [ 46 | { url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188 }, 47 | ] 48 | 49 | [[package]] 50 | name = "colorama" 51 | version = "0.4.6" 52 | source = { registry = "https://pypi.org/simple" } 53 | sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } 54 | wheels = [ 55 | { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, 56 | ] 57 | 58 | [[package]] 59 | name = "h11" 60 | version = "0.14.0" 61 | source = { registry = "https://pypi.org/simple" } 62 | sdist = { url = "https://files.pythonhosted.org/packages/f5/38/3af3d3633a34a3316095b39c8e8fb4853a28a536e55d347bd8d8e9a14b03/h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d", size = 100418 } 63 | wheels = [ 64 | { url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259 }, 65 | ] 66 | 67 | [[package]] 68 | name = "httpcore" 69 | version = "1.0.8" 70 | source = { registry = "https://pypi.org/simple" } 71 | dependencies = [ 72 | { name = "certifi" }, 73 | { name = "h11" }, 74 | ] 75 | sdist = { url = "https://files.pythonhosted.org/packages/9f/45/ad3e1b4d448f22c0cff4f5692f5ed0666658578e358b8d58a19846048059/httpcore-1.0.8.tar.gz", hash = "sha256:86e94505ed24ea06514883fd44d2bc02d90e77e7979c8eb71b90f41d364a1bad", size = 85385 } 76 | wheels = [ 77 | { url = "https://files.pythonhosted.org/packages/18/8d/f052b1e336bb2c1fc7ed1aaed898aa570c0b61a09707b108979d9fc6e308/httpcore-1.0.8-py3-none-any.whl", hash = "sha256:5254cf149bcb5f75e9d1b2b9f729ea4a4b883d1ad7379fc632b727cec23674be", size = 78732 }, 78 | ] 79 | 80 | [[package]] 81 | name = "httpx" 82 | version = "0.28.1" 83 | source = { registry = "https://pypi.org/simple" } 84 | dependencies = [ 85 | { name = "anyio" }, 86 | { name = "certifi" }, 87 | { name = "httpcore" }, 88 | { name = "idna" }, 89 | ] 90 | sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406 } 91 | wheels = [ 92 | { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 }, 93 | ] 94 | 95 | [[package]] 96 | name = "httpx-sse" 97 | version = "0.4.0" 98 | source = { registry = "https://pypi.org/simple" } 99 | sdist = { url = "https://files.pythonhosted.org/packages/4c/60/8f4281fa9bbf3c8034fd54c0e7412e66edbab6bc74c4996bd616f8d0406e/httpx-sse-0.4.0.tar.gz", hash = "sha256:1e81a3a3070ce322add1d3529ed42eb5f70817f45ed6ec915ab753f961139721", size = 12624 } 100 | wheels = [ 101 | { url = "https://files.pythonhosted.org/packages/e1/9b/a181f281f65d776426002f330c31849b86b31fc9d848db62e16f03ff739f/httpx_sse-0.4.0-py3-none-any.whl", hash = "sha256:f329af6eae57eaa2bdfd962b42524764af68075ea87370a2de920af5341e318f", size = 7819 }, 102 | ] 103 | 104 | [[package]] 105 | name = "idna" 106 | version = "3.10" 107 | source = { registry = "https://pypi.org/simple" } 108 | sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 } 109 | wheels = [ 110 | { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 }, 111 | ] 112 | 113 | [[package]] 114 | name = "markdown-it-py" 115 | version = "3.0.0" 116 | source = { registry = "https://pypi.org/simple" } 117 | dependencies = [ 118 | { name = "mdurl" }, 119 | ] 120 | sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596 } 121 | wheels = [ 122 | { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 }, 123 | ] 124 | 125 | [[package]] 126 | name = "mcp" 127 | version = "1.6.0" 128 | source = { registry = "https://pypi.org/simple" } 129 | dependencies = [ 130 | { name = "anyio" }, 131 | { name = "httpx" }, 132 | { name = "httpx-sse" }, 133 | { name = "pydantic" }, 134 | { name = "pydantic-settings" }, 135 | { name = "sse-starlette" }, 136 | { name = "starlette" }, 137 | { name = "uvicorn" }, 138 | ] 139 | sdist = { url = "https://files.pythonhosted.org/packages/95/d2/f587cb965a56e992634bebc8611c5b579af912b74e04eb9164bd49527d21/mcp-1.6.0.tar.gz", hash = "sha256:d9324876de2c5637369f43161cd71eebfd803df5a95e46225cab8d280e366723", size = 200031 } 140 | wheels = [ 141 | { url = "https://files.pythonhosted.org/packages/10/30/20a7f33b0b884a9d14dd3aa94ff1ac9da1479fe2ad66dd9e2736075d2506/mcp-1.6.0-py3-none-any.whl", hash = "sha256:7bd24c6ea042dbec44c754f100984d186620d8b841ec30f1b19eda9b93a634d0", size = 76077 }, 142 | ] 143 | 144 | [package.optional-dependencies] 145 | cli = [ 146 | { name = "python-dotenv" }, 147 | { name = "typer" }, 148 | ] 149 | 150 | [[package]] 151 | name = "mdurl" 152 | version = "0.1.2" 153 | source = { registry = "https://pypi.org/simple" } 154 | sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729 } 155 | wheels = [ 156 | { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 }, 157 | ] 158 | 159 | [[package]] 160 | name = "pixverse-mcp" 161 | version = "0.1.21" 162 | source = { virtual = "." } 163 | dependencies = [ 164 | { name = "httpx" }, 165 | { name = "mcp", extra = ["cli"] }, 166 | { name = "python-dotenv" }, 167 | ] 168 | 169 | [package.metadata] 170 | requires-dist = [ 171 | { name = "httpx", specifier = ">=0.24.0" }, 172 | { name = "mcp", extras = ["cli"], specifier = ">=1.6.0" }, 173 | { name = "python-dotenv", specifier = ">=1.0.0" }, 174 | ] 175 | 176 | [[package]] 177 | name = "pydantic" 178 | version = "2.11.3" 179 | source = { registry = "https://pypi.org/simple" } 180 | dependencies = [ 181 | { name = "annotated-types" }, 182 | { name = "pydantic-core" }, 183 | { name = "typing-extensions" }, 184 | { name = "typing-inspection" }, 185 | ] 186 | sdist = { url = "https://files.pythonhosted.org/packages/10/2e/ca897f093ee6c5f3b0bee123ee4465c50e75431c3d5b6a3b44a47134e891/pydantic-2.11.3.tar.gz", hash = "sha256:7471657138c16adad9322fe3070c0116dd6c3ad8d649300e3cbdfe91f4db4ec3", size = 785513 } 187 | wheels = [ 188 | { url = "https://files.pythonhosted.org/packages/b0/1d/407b29780a289868ed696d1616f4aad49d6388e5a77f567dcd2629dcd7b8/pydantic-2.11.3-py3-none-any.whl", hash = "sha256:a082753436a07f9ba1289c6ffa01cd93db3548776088aa917cc43b63f68fa60f", size = 443591 }, 189 | ] 190 | 191 | [[package]] 192 | name = "pydantic-core" 193 | version = "2.33.1" 194 | source = { registry = "https://pypi.org/simple" } 195 | dependencies = [ 196 | { name = "typing-extensions" }, 197 | ] 198 | sdist = { url = "https://files.pythonhosted.org/packages/17/19/ed6a078a5287aea7922de6841ef4c06157931622c89c2a47940837b5eecd/pydantic_core-2.33.1.tar.gz", hash = "sha256:bcc9c6fdb0ced789245b02b7d6603e17d1563064ddcfc36f046b61c0c05dd9df", size = 434395 } 199 | wheels = [ 200 | { url = "https://files.pythonhosted.org/packages/c8/ce/3cb22b07c29938f97ff5f5bb27521f95e2ebec399b882392deb68d6c440e/pydantic_core-2.33.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:1293d7febb995e9d3ec3ea09caf1a26214eec45b0f29f6074abb004723fc1de8", size = 2026640 }, 201 | { url = "https://files.pythonhosted.org/packages/19/78/f381d643b12378fee782a72126ec5d793081ef03791c28a0fd542a5bee64/pydantic_core-2.33.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:99b56acd433386c8f20be5c4000786d1e7ca0523c8eefc995d14d79c7a081498", size = 1852649 }, 202 | { url = "https://files.pythonhosted.org/packages/9d/2b/98a37b80b15aac9eb2c6cfc6dbd35e5058a352891c5cce3a8472d77665a6/pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35a5ec3fa8c2fe6c53e1b2ccc2454398f95d5393ab398478f53e1afbbeb4d939", size = 1892472 }, 203 | { url = "https://files.pythonhosted.org/packages/4e/d4/3c59514e0f55a161004792b9ff3039da52448f43f5834f905abef9db6e4a/pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b172f7b9d2f3abc0efd12e3386f7e48b576ef309544ac3a63e5e9cdd2e24585d", size = 1977509 }, 204 | { url = "https://files.pythonhosted.org/packages/a9/b6/c2c7946ef70576f79a25db59a576bce088bdc5952d1b93c9789b091df716/pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9097b9f17f91eea659b9ec58148c0747ec354a42f7389b9d50701610d86f812e", size = 2128702 }, 205 | { url = "https://files.pythonhosted.org/packages/88/fe/65a880f81e3f2a974312b61f82a03d85528f89a010ce21ad92f109d94deb/pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc77ec5b7e2118b152b0d886c7514a4653bcb58c6b1d760134a9fab915f777b3", size = 2679428 }, 206 | { url = "https://files.pythonhosted.org/packages/6f/ff/4459e4146afd0462fb483bb98aa2436d69c484737feaceba1341615fb0ac/pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5e3d15245b08fa4a84cefc6c9222e6f37c98111c8679fbd94aa145f9a0ae23d", size = 2008753 }, 207 | { url = "https://files.pythonhosted.org/packages/7c/76/1c42e384e8d78452ededac8b583fe2550c84abfef83a0552e0e7478ccbc3/pydantic_core-2.33.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ef99779001d7ac2e2461d8ab55d3373fe7315caefdbecd8ced75304ae5a6fc6b", size = 2114849 }, 208 | { url = "https://files.pythonhosted.org/packages/00/72/7d0cf05095c15f7ffe0eb78914b166d591c0eed72f294da68378da205101/pydantic_core-2.33.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:fc6bf8869e193855e8d91d91f6bf59699a5cdfaa47a404e278e776dd7f168b39", size = 2069541 }, 209 | { url = "https://files.pythonhosted.org/packages/b3/69/94a514066bb7d8be499aa764926937409d2389c09be0b5107a970286ef81/pydantic_core-2.33.1-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:b1caa0bc2741b043db7823843e1bde8aaa58a55a58fda06083b0569f8b45693a", size = 2239225 }, 210 | { url = "https://files.pythonhosted.org/packages/84/b0/e390071eadb44b41f4f54c3cef64d8bf5f9612c92686c9299eaa09e267e2/pydantic_core-2.33.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ec259f62538e8bf364903a7d0d0239447059f9434b284f5536e8402b7dd198db", size = 2248373 }, 211 | { url = "https://files.pythonhosted.org/packages/d6/b2/288b3579ffc07e92af66e2f1a11be3b056fe1214aab314748461f21a31c3/pydantic_core-2.33.1-cp312-cp312-win32.whl", hash = "sha256:e14f369c98a7c15772b9da98987f58e2b509a93235582838bd0d1d8c08b68fda", size = 1907034 }, 212 | { url = "https://files.pythonhosted.org/packages/02/28/58442ad1c22b5b6742b992ba9518420235adced665513868f99a1c2638a5/pydantic_core-2.33.1-cp312-cp312-win_amd64.whl", hash = "sha256:1c607801d85e2e123357b3893f82c97a42856192997b95b4d8325deb1cd0c5f4", size = 1956848 }, 213 | { url = "https://files.pythonhosted.org/packages/a1/eb/f54809b51c7e2a1d9f439f158b8dd94359321abcc98767e16fc48ae5a77e/pydantic_core-2.33.1-cp312-cp312-win_arm64.whl", hash = "sha256:8d13f0276806ee722e70a1c93da19748594f19ac4299c7e41237fc791d1861ea", size = 1903986 }, 214 | { url = "https://files.pythonhosted.org/packages/7a/24/eed3466a4308d79155f1cdd5c7432c80ddcc4530ba8623b79d5ced021641/pydantic_core-2.33.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:70af6a21237b53d1fe7b9325b20e65cbf2f0a848cf77bed492b029139701e66a", size = 2033551 }, 215 | { url = "https://files.pythonhosted.org/packages/ab/14/df54b1a0bc9b6ded9b758b73139d2c11b4e8eb43e8ab9c5847c0a2913ada/pydantic_core-2.33.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:282b3fe1bbbe5ae35224a0dbd05aed9ccabccd241e8e6b60370484234b456266", size = 1852785 }, 216 | { url = "https://files.pythonhosted.org/packages/fa/96/e275f15ff3d34bb04b0125d9bc8848bf69f25d784d92a63676112451bfb9/pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b315e596282bbb5822d0c7ee9d255595bd7506d1cb20c2911a4da0b970187d3", size = 1897758 }, 217 | { url = "https://files.pythonhosted.org/packages/b7/d8/96bc536e975b69e3a924b507d2a19aedbf50b24e08c80fb00e35f9baaed8/pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1dfae24cf9921875ca0ca6a8ecb4bb2f13c855794ed0d468d6abbec6e6dcd44a", size = 1986109 }, 218 | { url = "https://files.pythonhosted.org/packages/90/72/ab58e43ce7e900b88cb571ed057b2fcd0e95b708a2e0bed475b10130393e/pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6dd8ecfde08d8bfadaea669e83c63939af76f4cf5538a72597016edfa3fad516", size = 2129159 }, 219 | { url = "https://files.pythonhosted.org/packages/dc/3f/52d85781406886c6870ac995ec0ba7ccc028b530b0798c9080531b409fdb/pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2f593494876eae852dc98c43c6f260f45abdbfeec9e4324e31a481d948214764", size = 2680222 }, 220 | { url = "https://files.pythonhosted.org/packages/f4/56/6e2ef42f363a0eec0fd92f74a91e0ac48cd2e49b695aac1509ad81eee86a/pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:948b73114f47fd7016088e5186d13faf5e1b2fe83f5e320e371f035557fd264d", size = 2006980 }, 221 | { url = "https://files.pythonhosted.org/packages/4c/c0/604536c4379cc78359f9ee0aa319f4aedf6b652ec2854953f5a14fc38c5a/pydantic_core-2.33.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e11f3864eb516af21b01e25fac915a82e9ddad3bb0fb9e95a246067398b435a4", size = 2120840 }, 222 | { url = "https://files.pythonhosted.org/packages/1f/46/9eb764814f508f0edfb291a0f75d10854d78113fa13900ce13729aaec3ae/pydantic_core-2.33.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:549150be302428b56fdad0c23c2741dcdb5572413776826c965619a25d9c6bde", size = 2072518 }, 223 | { url = "https://files.pythonhosted.org/packages/42/e3/fb6b2a732b82d1666fa6bf53e3627867ea3131c5f39f98ce92141e3e3dc1/pydantic_core-2.33.1-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:495bc156026efafd9ef2d82372bd38afce78ddd82bf28ef5276c469e57c0c83e", size = 2248025 }, 224 | { url = "https://files.pythonhosted.org/packages/5c/9d/fbe8fe9d1aa4dac88723f10a921bc7418bd3378a567cb5e21193a3c48b43/pydantic_core-2.33.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ec79de2a8680b1a67a07490bddf9636d5c2fab609ba8c57597e855fa5fa4dacd", size = 2254991 }, 225 | { url = "https://files.pythonhosted.org/packages/aa/99/07e2237b8a66438d9b26482332cda99a9acccb58d284af7bc7c946a42fd3/pydantic_core-2.33.1-cp313-cp313-win32.whl", hash = "sha256:ee12a7be1742f81b8a65b36c6921022301d466b82d80315d215c4c691724986f", size = 1915262 }, 226 | { url = "https://files.pythonhosted.org/packages/8a/f4/e457a7849beeed1e5defbcf5051c6f7b3c91a0624dd31543a64fc9adcf52/pydantic_core-2.33.1-cp313-cp313-win_amd64.whl", hash = "sha256:ede9b407e39949d2afc46385ce6bd6e11588660c26f80576c11c958e6647bc40", size = 1956626 }, 227 | { url = "https://files.pythonhosted.org/packages/20/d0/e8d567a7cff7b04e017ae164d98011f1e1894269fe8e90ea187a3cbfb562/pydantic_core-2.33.1-cp313-cp313-win_arm64.whl", hash = "sha256:aa687a23d4b7871a00e03ca96a09cad0f28f443690d300500603bd0adba4b523", size = 1909590 }, 228 | { url = "https://files.pythonhosted.org/packages/ef/fd/24ea4302d7a527d672c5be06e17df16aabfb4e9fdc6e0b345c21580f3d2a/pydantic_core-2.33.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:401d7b76e1000d0dd5538e6381d28febdcacb097c8d340dde7d7fc6e13e9f95d", size = 1812963 }, 229 | { url = "https://files.pythonhosted.org/packages/5f/95/4fbc2ecdeb5c1c53f1175a32d870250194eb2fdf6291b795ab08c8646d5d/pydantic_core-2.33.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7aeb055a42d734c0255c9e489ac67e75397d59c6fbe60d155851e9782f276a9c", size = 1986896 }, 230 | { url = "https://files.pythonhosted.org/packages/71/ae/fe31e7f4a62431222d8f65a3bd02e3fa7e6026d154a00818e6d30520ea77/pydantic_core-2.33.1-cp313-cp313t-win_amd64.whl", hash = "sha256:338ea9b73e6e109f15ab439e62cb3b78aa752c7fd9536794112e14bee02c8d18", size = 1931810 }, 231 | ] 232 | 233 | [[package]] 234 | name = "pydantic-settings" 235 | version = "2.8.1" 236 | source = { registry = "https://pypi.org/simple" } 237 | dependencies = [ 238 | { name = "pydantic" }, 239 | { name = "python-dotenv" }, 240 | ] 241 | sdist = { url = "https://files.pythonhosted.org/packages/88/82/c79424d7d8c29b994fb01d277da57b0a9b09cc03c3ff875f9bd8a86b2145/pydantic_settings-2.8.1.tar.gz", hash = "sha256:d5c663dfbe9db9d5e1c646b2e161da12f0d734d422ee56f567d0ea2cee4e8585", size = 83550 } 242 | wheels = [ 243 | { url = "https://files.pythonhosted.org/packages/0b/53/a64f03044927dc47aafe029c42a5b7aabc38dfb813475e0e1bf71c4a59d0/pydantic_settings-2.8.1-py3-none-any.whl", hash = "sha256:81942d5ac3d905f7f3ee1a70df5dfb62d5569c12f51a5a647defc1c3d9ee2e9c", size = 30839 }, 244 | ] 245 | 246 | [[package]] 247 | name = "pygments" 248 | version = "2.19.1" 249 | source = { registry = "https://pypi.org/simple" } 250 | sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581 } 251 | wheels = [ 252 | { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, 253 | ] 254 | 255 | [[package]] 256 | name = "python-dotenv" 257 | version = "1.1.0" 258 | source = { registry = "https://pypi.org/simple" } 259 | sdist = { url = "https://files.pythonhosted.org/packages/88/2c/7bb1416c5620485aa793f2de31d3df393d3686aa8a8506d11e10e13c5baf/python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5", size = 39920 } 260 | wheels = [ 261 | { url = "https://files.pythonhosted.org/packages/1e/18/98a99ad95133c6a6e2005fe89faedf294a748bd5dc803008059409ac9b1e/python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d", size = 20256 }, 262 | ] 263 | 264 | [[package]] 265 | name = "rich" 266 | version = "14.0.0" 267 | source = { registry = "https://pypi.org/simple" } 268 | dependencies = [ 269 | { name = "markdown-it-py" }, 270 | { name = "pygments" }, 271 | ] 272 | sdist = { url = "https://files.pythonhosted.org/packages/a1/53/830aa4c3066a8ab0ae9a9955976fb770fe9c6102117c8ec4ab3ea62d89e8/rich-14.0.0.tar.gz", hash = "sha256:82f1bc23a6a21ebca4ae0c45af9bdbc492ed20231dcb63f297d6d1021a9d5725", size = 224078 } 273 | wheels = [ 274 | { url = "https://files.pythonhosted.org/packages/0d/9b/63f4c7ebc259242c89b3acafdb37b41d1185c07ff0011164674e9076b491/rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0", size = 243229 }, 275 | ] 276 | 277 | [[package]] 278 | name = "shellingham" 279 | version = "1.5.4" 280 | source = { registry = "https://pypi.org/simple" } 281 | sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310 } 282 | wheels = [ 283 | { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755 }, 284 | ] 285 | 286 | [[package]] 287 | name = "sniffio" 288 | version = "1.3.1" 289 | source = { registry = "https://pypi.org/simple" } 290 | sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372 } 291 | wheels = [ 292 | { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 }, 293 | ] 294 | 295 | [[package]] 296 | name = "sse-starlette" 297 | version = "2.2.1" 298 | source = { registry = "https://pypi.org/simple" } 299 | dependencies = [ 300 | { name = "anyio" }, 301 | { name = "starlette" }, 302 | ] 303 | sdist = { url = "https://files.pythonhosted.org/packages/71/a4/80d2a11af59fe75b48230846989e93979c892d3a20016b42bb44edb9e398/sse_starlette-2.2.1.tar.gz", hash = "sha256:54470d5f19274aeed6b2d473430b08b4b379ea851d953b11d7f1c4a2c118b419", size = 17376 } 304 | wheels = [ 305 | { url = "https://files.pythonhosted.org/packages/d9/e0/5b8bd393f27f4a62461c5cf2479c75a2cc2ffa330976f9f00f5f6e4f50eb/sse_starlette-2.2.1-py3-none-any.whl", hash = "sha256:6410a3d3ba0c89e7675d4c273a301d64649c03a5ef1ca101f10b47f895fd0e99", size = 10120 }, 306 | ] 307 | 308 | [[package]] 309 | name = "starlette" 310 | version = "0.46.2" 311 | source = { registry = "https://pypi.org/simple" } 312 | dependencies = [ 313 | { name = "anyio" }, 314 | ] 315 | sdist = { url = "https://files.pythonhosted.org/packages/ce/20/08dfcd9c983f6a6f4a1000d934b9e6d626cff8d2eeb77a89a68eef20a2b7/starlette-0.46.2.tar.gz", hash = "sha256:7f7361f34eed179294600af672f565727419830b54b7b084efe44bb82d2fccd5", size = 2580846 } 316 | wheels = [ 317 | { url = "https://files.pythonhosted.org/packages/8b/0c/9d30a4ebeb6db2b25a841afbb80f6ef9a854fc3b41be131d249a977b4959/starlette-0.46.2-py3-none-any.whl", hash = "sha256:595633ce89f8ffa71a015caed34a5b2dc1c0cdb3f0f1fbd1e69339cf2abeec35", size = 72037 }, 318 | ] 319 | 320 | [[package]] 321 | name = "typer" 322 | version = "0.15.2" 323 | source = { registry = "https://pypi.org/simple" } 324 | dependencies = [ 325 | { name = "click" }, 326 | { name = "rich" }, 327 | { name = "shellingham" }, 328 | { name = "typing-extensions" }, 329 | ] 330 | sdist = { url = "https://files.pythonhosted.org/packages/8b/6f/3991f0f1c7fcb2df31aef28e0594d8d54b05393a0e4e34c65e475c2a5d41/typer-0.15.2.tar.gz", hash = "sha256:ab2fab47533a813c49fe1f16b1a370fd5819099c00b119e0633df65f22144ba5", size = 100711 } 331 | wheels = [ 332 | { url = "https://files.pythonhosted.org/packages/7f/fc/5b29fea8cee020515ca82cc68e3b8e1e34bb19a3535ad854cac9257b414c/typer-0.15.2-py3-none-any.whl", hash = "sha256:46a499c6107d645a9c13f7ee46c5d5096cae6f5fc57dd11eccbbb9ae3e44ddfc", size = 45061 }, 333 | ] 334 | 335 | [[package]] 336 | name = "typing-extensions" 337 | version = "4.13.2" 338 | source = { registry = "https://pypi.org/simple" } 339 | sdist = { url = "https://files.pythonhosted.org/packages/f6/37/23083fcd6e35492953e8d2aaaa68b860eb422b34627b13f2ce3eb6106061/typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef", size = 106967 } 340 | wheels = [ 341 | { url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806 }, 342 | ] 343 | 344 | [[package]] 345 | name = "typing-inspection" 346 | version = "0.4.0" 347 | source = { registry = "https://pypi.org/simple" } 348 | dependencies = [ 349 | { name = "typing-extensions" }, 350 | ] 351 | sdist = { url = "https://files.pythonhosted.org/packages/82/5c/e6082df02e215b846b4b8c0b887a64d7d08ffaba30605502639d44c06b82/typing_inspection-0.4.0.tar.gz", hash = "sha256:9765c87de36671694a67904bf2c96e395be9c6439bb6c87b5142569dcdd65122", size = 76222 } 352 | wheels = [ 353 | { url = "https://files.pythonhosted.org/packages/31/08/aa4fdfb71f7de5176385bd9e90852eaf6b5d622735020ad600f2bab54385/typing_inspection-0.4.0-py3-none-any.whl", hash = "sha256:50e72559fcd2a6367a19f7a7e610e6afcb9fac940c650290eed893d61386832f", size = 14125 }, 354 | ] 355 | 356 | [[package]] 357 | name = "uvicorn" 358 | version = "0.34.1" 359 | source = { registry = "https://pypi.org/simple" } 360 | dependencies = [ 361 | { name = "click" }, 362 | { name = "h11" }, 363 | ] 364 | sdist = { url = "https://files.pythonhosted.org/packages/86/37/dd92f1f9cedb5eaf74d9999044306e06abe65344ff197864175dbbd91871/uvicorn-0.34.1.tar.gz", hash = "sha256:af981725fc4b7ffc5cb3b0e9eda6258a90c4b52cb2a83ce567ae0a7ae1757afc", size = 76755 } 365 | wheels = [ 366 | { url = "https://files.pythonhosted.org/packages/5f/38/a5801450940a858c102a7ad9e6150146a25406a119851c993148d56ab041/uvicorn-0.34.1-py3-none-any.whl", hash = "sha256:984c3a8c7ca18ebaad15995ee7401179212c59521e67bfc390c07fa2b8d2e065", size = 62404 }, 367 | ] 368 | -------------------------------------------------------------------------------- /src/server.py: -------------------------------------------------------------------------------- 1 | """ 2 | MCP server implementation for Pixverse video generation. 3 | """ 4 | 5 | import asyncio 6 | import json 7 | import os 8 | from typing import Any, Dict, List, Optional, Sequence 9 | 10 | from loguru import logger 11 | from mcp.server import Server 12 | from mcp.server.models import InitializationOptions 13 | from mcp.server import NotificationOptions 14 | from mcp.server.stdio import stdio_server 15 | from mcp.types import ( 16 | CallToolRequest, 17 | CallToolResult, 18 | EmbeddedResource, 19 | ImageContent, 20 | ListToolsRequest, 21 | ListToolsResult, 22 | TextContent, 23 | Tool, 24 | ContentBlock, 25 | ) 26 | from pydantic import ValidationError 27 | 28 | from .client import PixverseClient 29 | from .exceptions import PixverseError 30 | from .config import get_config, PixverseConfig 31 | from .models.requests import ( 32 | ExtendVideoRequest, 33 | FusionVideoRequest, 34 | ImageToVideoRequest, 35 | LipSyncVideoRequest, 36 | SoundEffectVideoRequest, 37 | TextToVideoRequest, 38 | TransitionVideoRequest, 39 | ) 40 | 41 | 42 | class PixverseMCPServer: 43 | """MCP Server for Pixverse video generation APIs.""" 44 | 45 | def __init__(self, config_path: Optional[str] = None): 46 | self.config_path = config_path 47 | self.config: Optional[PixverseConfig] = None 48 | self.server = Server("pixverse-mcp") 49 | self.client: Optional[PixverseClient] = None 50 | self._setup_handlers() 51 | 52 | def _setup_handlers(self): 53 | """Setup MCP server handlers.""" 54 | 55 | @self.server.list_tools() 56 | async def handle_list_tools() -> List[Tool]: 57 | """List available tools.""" 58 | return [ 59 | Tool( 60 | name="capabilities_overview", 61 | description="""System Capabilities Overview - Read before planning video production workflow. 62 | 63 | CORE CONSTRAINTS: 64 | • Single generation: Maximum 5s or 8s per call 65 | • For videos >8s: Must chain multiple generations using extend_video or compose multiple segments 66 | • Status checking: Poll get_video_status every 6 seconds until completion 67 | • Generation time: Typically 60-120 seconds per segment 68 | • Concurrency limits: Each account has its own concurrent generation limit 69 | - Multiple independent videos CAN be generated in parallel (e.g., different calls) 70 | - Test your account's concurrent capacity to optimize throughput 71 | 72 | AVAILABLE CAPABILITIES: 73 | 74 | 1. TEXT → VIDEO (text_to_video) 75 | - Generate video from text description alone 76 | - No reference images needed 77 | - Full creative control through prompts 78 | - Supports styles, camera movements, sound effects 79 | 80 | 2. IMAGE → VIDEO (image_to_video) 81 | - Animate static images 82 | - Requires upload_image first to get img_id 83 | - Better visual consistency with reference 84 | - Supports single or multiple images with templates 85 | 86 | 3. VIDEO EXTENSION (extend_video) 87 | - Continue existing video seamlessly 88 | - Key tool for creating longer sequences 89 | - Maintains visual continuity from source 90 | - Can be chained infinitely for any length 91 | 92 | 4. SCENE TRANSITIONS (transition_video) 93 | - Smooth morphing between two images 94 | - Requires two img_ids (first_frame, last_frame) 95 | - Creates bridging animation 96 | - Perfect for multi-scene storytelling 97 | 98 | 5. LIP SYNC (lip_sync_video) 99 | - Add realistic lip sync to talking head videos 100 | - Supports uploaded audio or TTS (text-to-speech) 101 | - Automatically matches mouth movements to audio 102 | - Multiple speaker voices available via get_tts_speakers 103 | 104 | 6. SOUND EFFECTS (sound_effect_video) 105 | - Add AI-generated sound effects to any video 106 | - Contextual sounds based on text description 107 | - Can preserve or replace original audio 108 | - Ambient sounds, foley effects, music, etc. 109 | 110 | 7. FUSION VIDEO (fusion_video) 111 | - Composite multiple subjects into one scene (v4.5 only) 112 | - Reference subjects using @ref_name syntax 113 | - Combine 1-3 image references (subjects/backgrounds) 114 | - Create impossible combinations naturally 115 | 116 | 8. RESOURCE UPLOADS (upload_image, upload_video) 117 | - Upload local files or from URLs 118 | - Images: jpg, jpeg, png, webp 119 | - Videos: mp4, mov 120 | - Returns resource IDs for other operations 121 | 122 | 9. TTS SPEAKERS (get_tts_speakers) 123 | - List available text-to-speech voice options 124 | - Get speaker_id for use in lip_sync_video 125 | - Multiple languages and accents 126 | - Pagination support for browsing voices 127 | 128 | 10. STATUS CHECK (get_video_status) 129 | - Monitor video generation progress 130 | - Retrieve completed video URL 131 | - Check for errors or failures 132 | - Poll every 6 seconds until ready 133 | 134 | WHAT YOU CAN BUILD: 135 | • Single-scene videos (5-8s): Direct generation 136 | • Extended sequences (any length): Chain segments with extend_video 137 | • Multi-scene stories: Combine different generations with or without transitions 138 | • Image animations: Upload + animate + extend 139 | • Hybrid content: Mix text-gen and image-gen segments 140 | • Smooth narratives: Use transitions between scene changes 141 | • Talking character videos: Generate/upload video + lip sync with audio/TTS 142 | • Enhanced videos: Add sound effects, ambient sounds, music 143 | • Composite scenes: Fusion video with multiple subjects and backgrounds 144 | 145 | SYSTEM BEHAVIOR: 146 | • Polling required: Videos don't generate instantly, must check status 147 | • Async nature: Submit job → get video_id → poll until ready 148 | • Modular design: Each tool does one thing, combine them for complex results 149 | • Resource management: Upload assets first, then reference by ID 150 | • Parallel generation: Submit multiple independent videos simultaneously to maximize throughput 151 | - Good: Generate 3 different text_to_video scenes at once 152 | - Bad: Try to extend_video before previous segment completes 153 | - Strategy: For multi-scene projects, generate all initial scenes in parallel, then extend each 154 | 155 | Your task: Analyze user requirements, understand constraints, and design an appropriate workflow using these capabilities.""", 156 | inputSchema={ 157 | "type": "object", 158 | "properties": {}, 159 | "description": "Informational overview, not a callable tool" 160 | }, 161 | ), 162 | Tool( 163 | name="text_to_video", 164 | description="""Generate video from text prompt. 165 | 166 | CAPABILITIES: 167 | • Creates video from text description alone 168 | • Supports various styles (anime, 3d_animation, clay, comic, cyberpunk) 169 | • Camera movement controls available (zoom, pan, rotation, etc.) - v4/v4.5 only 170 | • Duration: 5s or 8s per generation 171 | • Quality: 360p to 1080p 172 | 173 | WHEN TO USE: 174 | • No reference images available 175 | • Abstract concepts or imagined scenes 176 | • Starting point for longer sequences (combine with extend_video) 177 | • Creative scenarios requiring full prompt control 178 | 179 | LIMITATIONS: 180 | • Cannot exceed 8s in single call 181 | • For longer videos: chain with extend_video or create separate segments""", 182 | inputSchema={ 183 | "type": "object", 184 | "properties": { 185 | "prompt": { 186 | "type": "string", 187 | "description": "Text prompt for video generation (max 2048 chars)", 188 | "maxLength": 2048, 189 | }, 190 | "model": { 191 | "type": "string", 192 | "enum": ["v4.5", "v5"], 193 | "default": "v5", 194 | "description": "Model version to use", 195 | }, 196 | "duration": { 197 | "type": "integer", 198 | "enum": [5, 8], 199 | "default": 5, 200 | "description": "Video duration in seconds", 201 | }, 202 | "aspect_ratio": { 203 | "type": "string", 204 | "enum": ["16:9", "4:3", "1:1", "3:4", "9:16"], 205 | "default": "16:9", 206 | "description": "Video aspect ratio", 207 | }, 208 | "quality": { 209 | "type": "string", 210 | "enum": ["360p", "540p", "720p", "1080p"], 211 | "default": "540p", 212 | "description": "Video quality", 213 | }, 214 | "style": { 215 | "type": "string", 216 | "enum": ["anime", "3d_animation", "clay", "comic", "cyberpunk"], 217 | "description": "Video style (optional)", 218 | }, 219 | "negative_prompt": { 220 | "type": "string", 221 | "description": "Negative prompt (optional)", 222 | "maxLength": 2048, 223 | }, 224 | "motion_mode": { 225 | "type": "string", 226 | "enum": ["normal", "fast"], 227 | "default": "normal", 228 | "description": "Motion mode", 229 | }, 230 | "camera_movement": { 231 | "type": "string", 232 | "enum": [ 233 | "horizontal_right", 234 | "horizontal_left", 235 | "zoom_in", 236 | "zoom_out", 237 | "vertical_up", 238 | "vertical_down", 239 | "crane_up", 240 | "quickly_zoom_in", 241 | "quickly_zoom_out", 242 | "smooth_zoom_in", 243 | "camera_rotation", 244 | "robo_arm", 245 | "super_dolly_out", 246 | "whip_pan", 247 | "hitchcock", 248 | "left_follow", 249 | "right_follow", 250 | "pan_left", 251 | "pan_right", 252 | "fix_bg", 253 | "default", 254 | ], 255 | "description": "Camera movement (optional, cannot use with template_id)", 256 | }, 257 | "template_id": { 258 | "type": "integer", 259 | "description": "Template ID (optional, cannot use with camera_movement)", 260 | }, 261 | "seed": {"type": "integer", "description": "Random seed (optional)"}, 262 | "sound_effect_switch": { 263 | "type": "boolean", 264 | "default": False, 265 | "description": "Enable sound effects", 266 | }, 267 | "sound_effect_content": { 268 | "type": "string", 269 | "description": "Sound effect description (optional)", 270 | "maxLength": 2048, 271 | }, 272 | }, 273 | "required": ["prompt"], 274 | }, 275 | ), 276 | Tool( 277 | name="image_to_video", 278 | description="""Animate static images into video. 279 | 280 | CAPABILITIES: 281 | • Brings still images to life with motion 282 | • Better visual consistency than text_to_video (uses image as reference) 283 | • Supports single image or multiple images (with templates) 284 | • Duration: 5s or 8s per generation 285 | • Prompt guides animation style and motion 286 | 287 | REQUIREMENTS: 288 | • Must call upload_image first to obtain img_id 289 | • img_id required as input parameter 290 | 291 | WHEN TO USE: 292 | • Have photos, artwork, or visual references 293 | • Want consistent visual style based on reference 294 | • Creating photo animations or slideshow-style videos 295 | • Need visual control beyond text descriptions 296 | 297 | EXTENSIBILITY: 298 | • Can extend with extend_video for longer sequences 299 | • Can connect multiple images with transition_video""", 300 | inputSchema={ 301 | "type": "object", 302 | "properties": { 303 | "prompt": { 304 | "type": "string", 305 | "description": "Text prompt for video generation (max 2048 chars)", 306 | "maxLength": 2048, 307 | }, 308 | "img_id": {"type": "integer", "description": "Image ID for single image"}, 309 | "img_ids": { 310 | "type": "array", 311 | "items": {"type": "integer"}, 312 | "description": "Array of image IDs for multiple images only supports with multi-templates", 313 | }, 314 | "model": { 315 | "type": "string", 316 | "enum": ["v4.5", "v5"], 317 | "default": "v5", 318 | "description": "Model version to use", 319 | }, 320 | "duration": { 321 | "type": "integer", 322 | "enum": [5, 8], 323 | "default": 5, 324 | "description": "Video duration in seconds", 325 | }, 326 | "quality": { 327 | "type": "string", 328 | "enum": ["360p", "540p", "720p", "1080p"], 329 | "default": "540p", 330 | "description": "Video quality", 331 | }, 332 | "style": { 333 | "type": "string", 334 | "enum": ["anime", "3d_animation", "clay", "comic", "cyberpunk"], 335 | "description": "Video style (optional)", 336 | }, 337 | "template_id": {"type": "integer", "description": "Template ID (optional)ß"}, 338 | "motion_mode": { 339 | "type": "string", 340 | "enum": ["normal", "fast"], 341 | "default": "normal", 342 | "description": "Motion mode", 343 | }, 344 | "sound_effect_switch": { 345 | "type": "boolean", 346 | "default": False, 347 | "description": "Enable sound effects", 348 | }, 349 | }, 350 | "required": ["prompt"], 351 | }, 352 | ), 353 | Tool( 354 | name="transition_video", 355 | description="""Create smooth transition between two images. 356 | 357 | CAPABILITIES: 358 | • Generates morphing animation from first image to last image 359 | • Creates visual continuity between different scenes 360 | • Duration: 5s or 8s 361 | • Prompt guides transition style 362 | 363 | REQUIREMENTS: 364 | • Two img_ids needed: first_frame_img and last_frame_img 365 | • Both images must be uploaded via upload_image first 366 | 367 | WHEN TO USE: 368 | • Connecting different scenes or compositions 369 | • Multi-scene narratives requiring smooth visual flow 370 | • Photo montages with elegant transitions 371 | • Scene changes where visual continuity matters 372 | 373 | CREATIVE POTENTIAL: 374 | • Day-to-night transitions 375 | • Character transformations 376 | • Location changes 377 | • Abstract visual morphing""", 378 | inputSchema={ 379 | "type": "object", 380 | "properties": { 381 | "prompt": { 382 | "type": "string", 383 | "description": "Text prompt for video generation", 384 | "maxLength": 2048, 385 | }, 386 | "first_frame_img": {"type": "integer", "description": "First frame image ID"}, 387 | "last_frame_img": {"type": "integer", "description": "Last frame image ID"}, 388 | "model": { 389 | "type": "string", 390 | "enum": ["v4.5", "v5"], 391 | "default": "v5", 392 | "description": "Model version to use", 393 | }, 394 | "duration": { 395 | "type": "integer", 396 | "enum": [5, 8], 397 | "default": 5, 398 | "description": "Video duration in seconds", 399 | }, 400 | "quality": { 401 | "type": "string", 402 | "enum": ["360p", "540p", "720p", "1080p"], 403 | "default": "540p", 404 | "description": "Video quality", 405 | }, 406 | }, 407 | "required": ["prompt", "first_frame_img", "last_frame_img"], 408 | }, 409 | ), 410 | Tool( 411 | name="extend_video", 412 | description="""Continue an existing video with additional footage. 413 | 414 | CAPABILITIES: 415 | • Extends any generated video by 5s or 8s 416 | • Maintains visual continuity from source video 417 | • Can be chained multiple times for arbitrary length 418 | • Prompt can remain consistent or evolve for story progression 419 | 420 | REQUIREMENTS: 421 | • source_video_id from a previously generated video 422 | 423 | WHEN TO USE: 424 | • Any video requirement exceeding 8s duration 425 | • Creating longer narratives or sequences 426 | • Continuing visual story from initial generation 427 | 428 | KEY INSIGHT: 429 | This is the primary method for creating videos longer than the 8s single-generation limit. Any multi-second video requirement will likely need this tool. 430 | 431 | FLEXIBILITY: 432 | • Keep same prompt = smooth continuation 433 | • Evolve prompt = gradual scene progression 434 | • No strict limit on chain length (though quality may degrade after many extensions)""", 435 | inputSchema={ 436 | "type": "object", 437 | "properties": { 438 | "prompt": { 439 | "type": "string", 440 | "description": "Text prompt for video extension", 441 | "maxLength": 2048, 442 | }, 443 | "source_video_id": {"type": "integer", "description": "Source video ID (generated video)"}, 444 | "video_media_id": {"type": "integer", "description": "Video media ID (uploaded video)"}, 445 | "model": { 446 | "type": "string", 447 | "enum": ["v4.5", "v5"], 448 | "default": "v5", 449 | "description": "Model version", 450 | }, 451 | "duration": { 452 | "type": "integer", 453 | "enum": [5, 8], 454 | "default": 5, 455 | "description": "Video duration in seconds", 456 | }, 457 | "quality": { 458 | "type": "string", 459 | "enum": ["360p", "540p", "720p", "1080p"], 460 | "default": "540p", 461 | "description": "Video quality", 462 | }, 463 | }, 464 | "required": ["prompt"], 465 | }, 466 | ), 467 | Tool( 468 | name="lip_sync_video", 469 | description="""Generate lip sync video with synchronized mouth movements. 470 | 471 | CAPABILITIES: 472 | • Adds realistic lip sync to talking head videos 473 | • Supports both uploaded audio and TTS (text-to-speech) 474 | • Automatically matches mouth movements to audio timing 475 | • Works with both generated videos and uploaded videos 476 | 477 | INPUT OPTIONS: 478 | • Audio: Upload audio file (audio_media_id) OR use TTS (lip_sync_tts_content + speaker_id) 479 | • Video: Generated video (source_video_id) OR uploaded video (video_media_id) 480 | 481 | WHEN TO USE: 482 | • Creating talking character videos 483 | • Adding voiceovers to portrait videos 484 | • Dubbing existing videos with new dialogue 485 | • Character animations with speech 486 | 487 | TTS FEATURES: 488 | • Multiple speaker voices available (use get_tts_speakers to list) 489 | • Text limit: 200 characters per generation 490 | • Supports various languages and accents 491 | 492 | WORKFLOW: 493 | • With custom audio: upload_video + upload_audio → lip_sync_video 494 | • With TTS: generate/upload video + choose speaker → lip_sync_video with text""", 495 | inputSchema={ 496 | "type": "object", 497 | "properties": { 498 | "source_video_id": {"type": "integer", "description": "Source video ID (generated video)"}, 499 | "video_media_id": {"type": "integer", "description": "Video media ID (uploaded video)"}, 500 | "audio_media_id": {"type": "integer", "description": "Audio media ID (uploaded audio)"}, 501 | "lip_sync_tts_speaker_id": {"type": "string", "description": "TTS speaker ID"}, 502 | "lip_sync_tts_content": { 503 | "type": "string", 504 | "description": "TTS content (max 200 chars)", 505 | "maxLength": 200, 506 | }, 507 | }, 508 | "required": [], 509 | }, 510 | ), 511 | Tool( 512 | name="sound_effect_video", 513 | description="""Add AI-generated sound effects to video. 514 | 515 | CAPABILITIES: 516 | • Generates contextual sound effects based on text description 517 | • Can preserve or replace original video audio 518 | • AI analyzes video content and matches sound effects to visuals 519 | • Duration matches source video length 520 | 521 | INPUT: 522 | • Video: Generated video (source_video_id) OR uploaded video (video_media_id) 523 | • Description: Text describing desired sound effects (sound_effect_content) 524 | 525 | WHEN TO USE: 526 | • Adding ambient sounds (ocean waves, wind, rain, city noise) 527 | • Creating foley effects (footsteps, door creaks, object interactions) 528 | • Enhancing atmosphere (dramatic music, tension sounds) 529 | • Replacing/augmenting existing audio 530 | 531 | AUDIO CONTROL: 532 | • original_sound_switch=true: Mixes new effects with existing audio 533 | • original_sound_switch=false: Replaces audio entirely with new effects 534 | 535 | CREATIVE EXAMPLES: 536 | • Nature videos: "Gentle ocean waves, seagull calls, soft wind" 537 | • Urban scenes: "Busy city traffic, people chatting, car horns" 538 | • Dramatic moments: "Suspenseful music, thunder rumbling" 539 | • Fantasy: "Magical sparkles, mystical ambiance, ethereal tones\"""", 540 | inputSchema={ 541 | "type": "object", 542 | "properties": { 543 | "sound_effect_content": { 544 | "type": "string", 545 | "description": "Sound effect description", 546 | "maxLength": 2048, 547 | }, 548 | "source_video_id": {"type": "integer", "description": "Source video ID (generated video)"}, 549 | "video_media_id": {"type": "integer", "description": "Video media ID (uploaded video)"}, 550 | "original_sound_switch": { 551 | "type": "boolean", 552 | "default": False, 553 | "description": "Keep original sound", 554 | }, 555 | }, 556 | "required": ["sound_effect_content"], 557 | }, 558 | ), 559 | Tool( 560 | name="fusion_video", 561 | description="""Generate fusion video compositing multiple subjects into one scene (v4.5 only). 562 | 563 | CAPABILITIES: 564 | • Combines multiple subjects/backgrounds into a single coherent video 565 | • Reference subjects by name in prompt using @ref_name syntax 566 | • Supports 1-3 image references per video 567 | • Each reference can be a subject or background 568 | • AI composites them naturally into unified scene 569 | 570 | HOW IT WORKS: 571 | 1. Upload reference images (subjects/backgrounds) 572 | 2. Assign each a ref_name (e.g., "person", "cat", "beach") 573 | 3. In prompt, reference them: "@person walking on the @beach with a @cat" 574 | 4. AI generates video with all elements composed together 575 | 576 | REFERENCE TYPES: 577 | • subject: Main elements (characters, objects, specific items) 578 | • background: Environmental settings (locations, scenery) 579 | 580 | WHEN TO USE: 581 | • Placing specific characters in custom environments 582 | • Creating impossible combinations (person + fantasy background) 583 | • Product placement in various contexts 584 | • Character interactions that don't exist in reality 585 | 586 | LIMITATIONS: 587 | • v4.5 model only (not available in v5) 588 | • Maximum 3 image references per video 589 | • ref_name must be alphanumeric, max 30 characters 590 | 591 | EXAMPLE WORKFLOW: 592 | 1. upload_image(portrait.jpg) → img_id_1 (ref_name="hero", type="subject") 593 | 2. upload_image(castle.jpg) → img_id_2 (ref_name="castle", type="background") 594 | 3. fusion_video(prompt="@hero standing in front of @castle at sunset") 595 | Result: Portrait character composited into castle scene""", 596 | inputSchema={ 597 | "type": "object", 598 | "properties": { 599 | "prompt": { 600 | "type": "string", 601 | "description": "Text prompt with @ref_name references", 602 | "maxLength": 2048, 603 | }, 604 | "image_references": { 605 | "type": "array", 606 | "items": { 607 | "type": "object", 608 | "properties": { 609 | "type": {"type": "string", "enum": ["subject", "background"]}, 610 | "img_id": {"type": "integer"}, 611 | "ref_name": {"type": "string", "maxLength": 30}, 612 | }, 613 | "required": ["type", "img_id", "ref_name"], 614 | }, 615 | "minItems": 1, 616 | "maxItems": 3, 617 | }, 618 | "duration": {"type": "integer", "enum": [5, 8], "default": 5}, 619 | "quality": {"type": "string", "enum": ["360p", "540p", "720p", "1080p"], "default": "540p"}, 620 | "aspect_ratio": { 621 | "type": "string", 622 | "enum": ["16:9", "4:3", "1:1", "3:4", "9:16"], 623 | "default": "16:9", 624 | }, 625 | }, 626 | "required": ["prompt", "image_references"], 627 | }, 628 | ), 629 | Tool( 630 | name="upload_image", 631 | description="""Upload image file or from URL to Pixverse for video generation. 632 | 633 | CAPABILITIES: 634 | • Upload local image files from disk 635 | • Upload images directly from URLs 636 | • Supports: jpg, jpeg, png, webp formats 637 | • Returns img_id for use in other tools 638 | 639 | WHEN TO USE: 640 | • Before calling image_to_video (requires img_id) 641 | • Before calling transition_video (requires 2 img_ids) 642 | • Before calling fusion_video (requires 1-3 img_ids) 643 | 644 | INPUT OPTIONS: 645 | • file_path: Path to local image file 646 | • image_url: Direct URL to image 647 | (Provide ONE, not both) 648 | 649 | WORKFLOW: 650 | 1. upload_image → receive img_id in response 651 | 2. Use img_id in generation tools (image_to_video, transition_video, fusion_video) 652 | 653 | RETURNS: 654 | • img_id: Integer identifier for the uploaded image 655 | • img_url: Pixverse CDN URL where image is stored 656 | • File metadata (name, size, type)""", 657 | inputSchema={ 658 | "type": "object", 659 | "properties": { 660 | "file_path": { 661 | "type": "string", 662 | "description": "Path to the local image file to upload (supports jpg, jpeg, png, webp formats)", 663 | }, 664 | "image_url": { 665 | "type": "string", 666 | "description": "URL of the image to upload (alternative to file_path)", 667 | }, 668 | }, 669 | "required": [], 670 | }, 671 | ), 672 | Tool( 673 | name="upload_video", 674 | description="""Upload video file or from URL to Pixverse for video extension or other operations. 675 | 676 | CAPABILITIES: 677 | • Upload local video files from disk 678 | • Upload videos directly from URLs 679 | • Supports: mp4, mov formats 680 | • Returns video_media_id for use in other tools 681 | 682 | WHEN TO USE: 683 | • Before calling extend_video on existing footage (needs video_media_id) 684 | • Before calling lip_sync_video on external videos 685 | • Before calling sound_effect_video on external videos 686 | • When you want to extend/modify videos not generated by this system 687 | 688 | INPUT OPTIONS: 689 | • file_path: Path to local video file 690 | • file_url: Direct URL to video 691 | (Provide ONE, not both) 692 | 693 | WORKFLOW: 694 | 1. upload_video → receive video_media_id in response 695 | 2. Use video_media_id in processing tools (extend_video, lip_sync_video, sound_effect_video) 696 | 697 | RETURNS: 698 | • video_media_id: Integer identifier for the uploaded video 699 | • media_url: Pixverse CDN URL where video is stored 700 | • File metadata (name, size, type, duration) 701 | 702 | NOTE: For videos generated by this system, use source_video_id directly (no upload needed)""", 703 | inputSchema={ 704 | "type": "object", 705 | "properties": { 706 | "file_path": { 707 | "type": "string", 708 | "description": "Path to the local video file to upload (supports mp4, mov, avi formats)", 709 | }, 710 | "file_url": { 711 | "type": "string", 712 | "description": "URL of the video file to upload (alternative to file_path)", 713 | }, 714 | }, 715 | "required": [], 716 | }, 717 | ), 718 | Tool( 719 | name="get_tts_speakers", 720 | description="""Get list of available TTS speakers for lip sync. 721 | 722 | CAPABILITIES: 723 | • Lists all available text-to-speech voice options 724 | • Provides speaker_id needed for lip_sync_video 725 | • Shows speaker names and characteristics 726 | • Supports pagination for large speaker lists 727 | 728 | WHEN TO USE: 729 | • Before using lip_sync_video with TTS 730 | • When user wants to choose specific voice character 731 | • To discover available voice options 732 | 733 | PARAMETERS: 734 | • page_num: Page number (default: 1) 735 | • page_size: Results per page (default: 30) 736 | 737 | RETURNS: 738 | • List of speakers with: 739 | - speaker_id: Unique identifier to use in lip_sync_video 740 | - name: Speaker name/description 741 | - Additional metadata (language, accent, gender, etc.) 742 | 743 | WORKFLOW: 744 | 1. get_tts_speakers() → browse available voices 745 | 2. Choose speaker_id 746 | 3. Use in lip_sync_video(lip_sync_tts_speaker_id=..., lip_sync_tts_content="text")""", 747 | inputSchema={ 748 | "type": "object", 749 | "properties": { 750 | "page_num": {"type": "integer", "default": 1, "description": "Page number"}, 751 | "page_size": {"type": "integer", "default": 30, "description": "Page size"}, 752 | }, 753 | }, 754 | ), 755 | Tool( 756 | name="get_video_status", 757 | description="""Get video generation status and result by video ID. 758 | 759 | CAPABILITIES: 760 | • Check real-time generation progress 761 | • Retrieve completed video URL 762 | • Monitor for failures or errors 763 | • Get video metadata (resolution, size, seed, etc.) 764 | 765 | STATUS VALUES: 766 | • pending: Video queued, generation not started 767 | • in_progress: Actively generating 768 | • completed: Ready! video_url available 769 | • failed: Generation failed, error_message provided 770 | 771 | POLLING BEHAVIOR: 772 | • IMPORTANT: Wait 6 seconds between each status check 773 | • Typical generation time: 60-120 seconds per segment 774 | • Don't poll too frequently (wastes resources, doesn't speed up generation) 775 | 776 | WHEN TO USE: 777 | • After calling any video generation tool (text_to_video, image_to_video, etc.) 778 | • Every 6 seconds until status becomes "completed" or "failed" 779 | • To retrieve final video URL after completion 780 | 781 | RETURNS: 782 | • status: Current generation state 783 | • video_url: Download URL (when completed) 784 | • resolution: Video dimensions (when completed) 785 | • file_size: Video file size 786 | • seed: Random seed used 787 | • error_message: Failure reason (if failed) 788 | 789 | WORKFLOW: 790 | 1. Call generation tool → receive video_id 791 | 2. Wait 6 seconds 792 | 3. get_video_status(video_id) 793 | 4. If pending/in_progress: repeat step 2-3 794 | 5. If completed: use video_url 795 | 6. If failed: check error_message""", 796 | inputSchema={ 797 | "type": "object", 798 | "properties": { 799 | "video_id": { 800 | "type": "integer", 801 | "description": "Video ID to check status for", 802 | }, 803 | }, 804 | "required": ["video_id"], 805 | }, 806 | ), 807 | ] 808 | 809 | @self.server.call_tool() 810 | async def handle_call_tool(name: str, arguments: Dict[str, Any]): 811 | """Handle tool calls - Cursor compatible version.""" 812 | try: 813 | if not self.client: 814 | raise PixverseError("Pixverse client not initialized") 815 | 816 | # Check if this is a video generation tool that should include polling 817 | video_generation_tools = [ 818 | "text_to_video", "image_to_video", "transition_video", 819 | "extend_video", "lip_sync_video", "sound_effect_video", "fusion_video" 820 | ] 821 | 822 | if name in video_generation_tools: 823 | # Handle video generation with automatic polling 824 | if name == "text_to_video": 825 | request = TextToVideoRequest(**arguments) 826 | elif name == "image_to_video": 827 | request = ImageToVideoRequest(**arguments) 828 | elif name == "transition_video": 829 | request = TransitionVideoRequest(**arguments) 830 | elif name == "extend_video": 831 | request = ExtendVideoRequest(**arguments) 832 | elif name == "lip_sync_video": 833 | request = LipSyncVideoRequest(**arguments) 834 | elif name == "sound_effect_video": 835 | request = SoundEffectVideoRequest(**arguments) 836 | elif name == "fusion_video": 837 | request = FusionVideoRequest(**arguments) 838 | else: 839 | raise PixverseError(f"Unsupported video generation tool: {name}") 840 | 841 | # 分步骤流程:快速提交任务,返回video_id供后续查询 842 | if name == "text_to_video": 843 | result = await self.client.text_to_video(request) 844 | elif name == "image_to_video": 845 | result = await self.client.image_to_video(request) 846 | elif name == "transition_video": 847 | result = await self.client.transition_video(request) 848 | elif name == "extend_video": 849 | result = await self.client.extend_video(request) 850 | elif name == "lip_sync_video": 851 | result = await self.client.lip_sync_video(request) 852 | elif name == "sound_effect_video": 853 | result = await self.client.sound_effect_video(request) 854 | elif name == "fusion_video": 855 | result = await self.client.fusion_video(request) 856 | 857 | # 返回提交成功的信息,指导LLM进行自动轮询 858 | return { 859 | "success": True, 860 | "message": f"{name} has been successfully submitted", 861 | "video_id": result.video_id, 862 | "status": "submitted", 863 | "next_step": "Call the get_video_status endpoint every 6 seconds to check the generation status, up to 2 minutes (20 attempts).", 864 | "polling_config": { 865 | "interval_seconds": 6, 866 | "timeout_minutes": 2, 867 | "max_attempts": 20 868 | }, 869 | "estimated_time": "Estimated completion time: 1–2 minutes.", 870 | "instruction_for_llm": "Start polling immediately. Call get_video_status every 6 seconds until the status becomes 'completed' or the request times out." 871 | } 872 | 873 | elif name == "upload_image": 874 | # Handle image upload (file or URL) 875 | file_path = arguments.get("file_path") 876 | image_url = arguments.get("image_url") 877 | 878 | if not file_path and not image_url: 879 | raise PixverseError("Either file_path or image_url is required") 880 | 881 | if file_path and image_url: 882 | raise PixverseError("Only one of file_path or image_url should be provided") 883 | 884 | if file_path: 885 | # Local file upload 886 | from pathlib import Path 887 | if not Path(file_path).exists(): 888 | raise PixverseError(f"Image file not found: {file_path}") 889 | 890 | # Check file format 891 | allowed_extensions = {'.jpg', '.jpeg', '.png', '.webp'} 892 | file_ext = Path(file_path).suffix.lower() 893 | if file_ext not in allowed_extensions: 894 | raise PixverseError(f"Unsupported file format: {file_ext}. Supported formats: {', '.join(allowed_extensions)}") 895 | 896 | # Upload image file 897 | result = await self.client.upload_image(file_path=file_path) 898 | 899 | return { 900 | "success": True, 901 | "message": "Image file uploaded successfully", 902 | "img_id": result.img_id, 903 | "img_url": result.img_url, 904 | "file_path": file_path, 905 | "file_name": Path(file_path).name, 906 | "upload_type": "file", 907 | "next_step": "You can now use img_id to call the image_to_video endpoint to generate a video." 908 | } 909 | else: 910 | # URL upload 911 | import re 912 | # Basic URL validation 913 | url_pattern = re.compile( 914 | r'^https?://' # http:// or https:// 915 | r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|' # domain... 916 | r'localhost|' # localhost... 917 | r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip 918 | r'(?::\d+)?' # optional port 919 | r'(?:/?|[/?]\S+)$', re.IGNORECASE) 920 | 921 | if not url_pattern.match(image_url): 922 | raise PixverseError(f"Invalid image URL format: {image_url}") 923 | 924 | # Upload image from URL 925 | result = await self.client.upload_image(image_url=image_url) 926 | 927 | return { 928 | "success": True, 929 | "message": "Image file uploaded successfully", 930 | "img_id": result.img_id, 931 | "img_url": result.img_url, 932 | "source_url": image_url, 933 | "upload_type": "url", 934 | "next_step": "You can now use img_id to call the image_to_video endpoint to generate a video." 935 | } 936 | 937 | elif name == "upload_video": 938 | # Handle video upload (file or URL) 939 | file_path = arguments.get("file_path") 940 | file_url = arguments.get("file_url") 941 | 942 | if not file_path and not file_url: 943 | raise PixverseError("Either file_path or file_url is required") 944 | 945 | if file_path and file_url: 946 | raise PixverseError("Only one of file_path or file_url should be provided") 947 | 948 | if file_path: 949 | # Local file upload 950 | from pathlib import Path 951 | if not Path(file_path).exists(): 952 | raise PixverseError(f"Video file not found: {file_path}") 953 | 954 | # Check file format 955 | allowed_extensions = {'.mp4', '.mov', '.avi', '.mkv', '.webm'} 956 | file_ext = Path(file_path).suffix.lower() 957 | if file_ext not in allowed_extensions: 958 | raise PixverseError(f"Unsupported file format: {file_ext}. Supported formats: {', '.join(allowed_extensions)}") 959 | 960 | # Upload video file 961 | result = await self.client.upload_media(file_path=file_path, media_type="video") 962 | 963 | return { 964 | "success": True, 965 | "message": "Video URL uploaded successfully", 966 | "video_media_id": result.media_id, 967 | "media_url": result.url, 968 | "media_type": result.media_type, 969 | "file_path": file_path, 970 | "file_name": Path(file_path).name, 971 | "file_size": Path(file_path).stat().st_size, 972 | "upload_type": "file", 973 | "next_step": "Next, use video_media_id with the extend_video/lipsynnc/sound_effect/restyle endpoint" 974 | } 975 | else: 976 | # URL upload 977 | import re 978 | # Basic URL validation 979 | url_pattern = re.compile( 980 | r'^https?://' # http:// or https:// 981 | r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|' # domain... 982 | r'localhost|' # localhost... 983 | r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip 984 | r'(?::\d+)?' # optional port 985 | r'(?:/?|[/?]\S+)$', re.IGNORECASE) 986 | 987 | if not url_pattern.match(file_url): 988 | raise PixverseError(f"Invalid video URL format: {file_url}") 989 | 990 | # Upload video from URL 991 | result = await self.client.upload_media(file_url=file_url, media_type="video") 992 | 993 | return { 994 | "success": True, 995 | "message": "Video URL uploaded successfully", 996 | "video_media_id": result.media_id, 997 | "media_url": result.url, 998 | "media_type": result.media_type, 999 | "source_url": file_url, 1000 | "upload_type": "url", 1001 | "next_step": "Next, use video_media_id with the extend_video/lipsynnc/sound_effect/restyle endpoint" 1002 | } 1003 | 1004 | elif name == "get_tts_speakers": 1005 | # Handle TTS speakers request 1006 | page_num = arguments.get("page_num", 1) 1007 | page_size = arguments.get("page_size", 30) 1008 | 1009 | result = await self.client.get_lip_sync_tts_list(page_num=page_num, page_size=page_size) 1010 | 1011 | # Format the result 1012 | if hasattr(result, "model_dump"): 1013 | result_dict = result.model_dump() 1014 | elif hasattr(result, "dict"): 1015 | result_dict = result.dict() 1016 | else: 1017 | result_dict = result 1018 | 1019 | # 返回字典格式,避免Cursor客户端的字符串解析bug 1020 | speakers_data = result_dict.get('data', []) 1021 | total_speakers = result_dict.get('total', len(speakers_data)) 1022 | 1023 | return { 1024 | "success": True, 1025 | "message": "TTS speaker list retrieved successfully", 1026 | "total_speakers": total_speakers, 1027 | "page": page_num, 1028 | "page_size": page_size, 1029 | "speakers": speakers_data, 1030 | "available_speakers": [ 1031 | { 1032 | "speaker_id": speaker.get("speaker_id", ""), 1033 | "name": speaker.get("name", "") 1034 | } for speaker in speakers_data 1035 | ], 1036 | "next_step": "Use speaker_id to call the lip_sync_video endpoint for lip-sync generation" 1037 | } 1038 | 1039 | elif name == "get_video_status": 1040 | # Handle video status query 1041 | video_id = arguments.get("video_id") 1042 | if not video_id: 1043 | raise PixverseError("video_id is required") 1044 | 1045 | result = await self.client.get_video_result(video_id) 1046 | 1047 | # Format the result 1048 | if hasattr(result, "model_dump"): 1049 | result_dict = result.model_dump() 1050 | elif hasattr(result, "dict"): 1051 | result_dict = result.dict() 1052 | else: 1053 | result_dict = result 1054 | 1055 | status_text = result.status.value if hasattr(result.status, 'value') else str(result.status) 1056 | 1057 | status_message = f"""get_video_status 1058 | 1059 | 🆔 Video_id: {video_id} 1060 | 🔄 Status: {status_text}""" 1061 | 1062 | if result.status.value == "completed" if hasattr(result.status, 'value') else str(result.status) == "completed": 1063 | status_message += f""" 1064 | 🎉 Video generated successfully! 1065 | 🎬 Video_URL: {result.video_url if result.video_url else 'N/A'}""" 1066 | if result.outputWidth and result.outputHeight: 1067 | status_message += f"\n📏 Resolution: {result.outputWidth}x{result.outputHeight}" 1068 | if result.size: 1069 | status_message += f"\n📦 File size: {result.size} bytes" 1070 | if result.seed: 1071 | status_message += f"\n🎲 Seed: {result.seed}" 1072 | elif result.status.value == "failed" if hasattr(result.status, 'value') else str(result.status) == "failed": 1073 | error_msg = result.error_message if hasattr(result, 'error_message') and result.error_message else "未知错误" 1074 | status_message += f""" 1075 | ❌ Video generation failed! 1076 | 🚫 Error: {error_msg}""" 1077 | elif result.status.value in ["pending", "in_progress"] if hasattr(result.status, 'value') else str(result.status) in ["pending", "in_progress"]: 1078 | status_message += """ 1079 | ⏳ Video is being generated, please check again shortly""" 1080 | 1081 | # 返回结构化状态信息,包含预估时间和LLM指导 1082 | base_response = { 1083 | "success": True, 1084 | "video_id": video_id, 1085 | "status": status_text, 1086 | "message":"Video status retrieved successfully", 1087 | "video_url": result.video_url if hasattr(result, 'video_url') and result.video_url else None, 1088 | "resolution": f"{result.outputWidth}x{result.outputHeight}" if hasattr(result, 'outputWidth') and result.outputWidth else None, 1089 | "file_size": result.size if hasattr(result, 'size') and result.size else None, 1090 | "seed": result.seed if hasattr(result, 'seed') and result.seed else None, 1091 | "error_message": result.error_message if hasattr(result, 'error_message') and result.error_message else None, 1092 | "data": result_dict 1093 | } 1094 | 1095 | # 根据状态添加不同的指导信息 1096 | if status_text == "completed": 1097 | base_response.update({ 1098 | "next_step": "Generation completed, you can stop polling", 1099 | "estimated_time": "Completed", 1100 | "instruction_for_llm": "The video has been generated. Please display the result to the user." 1101 | }) 1102 | elif status_text == "failed": 1103 | base_response.update({ 1104 | "next_step": "Generation failed, stop polling", 1105 | "estimated_time": "Failed", 1106 | "instruction_for_llm": "Video generation failed. Please inform the user of the error." 1107 | }) 1108 | elif status_text in ["pending", "in_progress"]: 1109 | base_response.update({ 1110 | "next_step": "Wait 6 seconds and check the status again", 1111 | "estimated_time": "Estimated 30–90 seconds remaining", 1112 | "instruction_for_llm": "The video is being generated. Please call get_video_status again after 6 seconds." 1113 | }) 1114 | else: 1115 | base_response.update({ 1116 | "next_step": "Wait 6 seconds and check the status again", 1117 | "estimated_time": "Unknown", 1118 | "instruction_for_llm": "Status is unknown. Please query again after 6 seconds." 1119 | }) 1120 | 1121 | return base_response 1122 | 1123 | else: 1124 | raise PixverseError(f"Unknown tool: {name}") 1125 | 1126 | except Exception as e: 1127 | logger.error(f"Error in handle_call_tool: {e}") 1128 | # 返回结构化错误信息 1129 | return { 1130 | "success": False, 1131 | "error": str(e), 1132 | "message": "Fail to call the tool", 1133 | "tool_name": name 1134 | } 1135 | 1136 | async def _generate_video_with_polling(self, tool_name: str, request_obj, arguments: Dict[str, Any]) -> Dict[str, Any]: 1137 | """Generate video and poll for completion status""" 1138 | from .models.responses import VideoStatus 1139 | 1140 | # Submit generation request 1141 | logger.info(f"🚀 {tool_name} task submitted...") 1142 | if tool_name == "text_to_video": 1143 | result = await self.client.text_to_video(request_obj) 1144 | elif tool_name == "image_to_video": 1145 | result = await self.client.image_to_video(request_obj) 1146 | elif tool_name == "transition_video": 1147 | result = await self.client.transition_video(request_obj) 1148 | elif tool_name == "extend_video": 1149 | result = await self.client.extend_video(request_obj) 1150 | elif tool_name == "lip_sync_video": 1151 | result = await self.client.lip_sync_video(request_obj) 1152 | elif tool_name == "sound_effect_video": 1153 | result = await self.client.sound_effect_video(request_obj) 1154 | elif tool_name == "fusion_video": 1155 | result = await self.client.fusion_video(request_obj) 1156 | else: 1157 | raise PixverseError(f"Unsupported tool for polling: {tool_name}") 1158 | 1159 | video_id = result.video_id 1160 | logger.info(f"📹 Task submitted, Video ID:: {video_id}") 1161 | 1162 | # Start polling for status 1163 | max_attempts = 20 # up to ~2 minute (20 * 6s) 1164 | attempt = 0 1165 | 1166 | status_updates = [f"✅ {tool_name} task submitted"] 1167 | status_updates.append(f"📹 Video ID: {video_id}") 1168 | status_updates.append(f"🔄 Starting to check generation status...") 1169 | 1170 | while attempt < max_attempts: 1171 | attempt += 1 1172 | 1173 | try: 1174 | # Query status 1175 | status_result = await self.client.get_video_result(video_id) 1176 | status_text = status_result.status.value if hasattr(status_result.status, 'value') else str(status_result.status) 1177 | 1178 | status_updates.append(f"[{attempt:2d}/20] 状态: {status_text}") 1179 | 1180 | if status_result.status == VideoStatus.COMPLETED: 1181 | status_updates.append("🎉 Video generated successfully!") 1182 | if status_result.video_url: 1183 | status_updates.append(f"🎬 Video URL: {status_result.video_url}") 1184 | 1185 | # Return complete result with all details 1186 | return { 1187 | "success": True, 1188 | "status": "completed", 1189 | "message": "Video generation completed!", 1190 | "video_id": video_id, 1191 | "video_url": status_result.video_url, 1192 | "resolution": f"{status_result.outputWidth}x{status_result.outputHeight}" if hasattr(status_result, 'outputWidth') and status_result.outputWidth else None, 1193 | "file_size": getattr(status_result, 'size', None), 1194 | "seed": getattr(status_result, 'seed', None), 1195 | "style": getattr(status_result, 'style', None), 1196 | "polling_log": status_updates 1197 | } 1198 | 1199 | elif status_result.status == VideoStatus.FAILED: 1200 | status_updates.append("\n❌ Video generation failed!") 1201 | error_msg = getattr(status_result, 'error_message', 'Unknown error') 1202 | if error_msg: 1203 | status_updates.append(f"Error message: {error_msg}") 1204 | 1205 | return { 1206 | "success": False, 1207 | "status": "failed", 1208 | "message": "Video generation failed", 1209 | "video_id": video_id, 1210 | "error": error_msg, 1211 | "polling_log": status_updates 1212 | } 1213 | 1214 | elif status_result.status in [VideoStatus.PENDING, VideoStatus.IN_PROGRESS]: 1215 | status_updates.append(" ⏳ Timeout reached. Please check the result later...") 1216 | await asyncio.sleep(3) # wait 3 seconds 1217 | else: 1218 | status_updates.append(f" ❓ Unknown status: {status_result.status}") 1219 | await asyncio.sleep(3) 1220 | 1221 | except Exception as e: 1222 | logger.error(f"Error while querying status: {e}") 1223 | status_updates.append(f"⚠️ Error querying status: {str(e)}") 1224 | await asyncio.sleep(3) 1225 | 1226 | # Timeout 1227 | status_updates.append(f"\n⏰ Timeout reached. Please check the result later") 1228 | status_updates.append(f"📋 Video ID: {video_id}") 1229 | 1230 | return { 1231 | "success": False, 1232 | "status": "timeout", 1233 | "message": "Timeout reached. Please check the result later.", 1234 | "video_id": video_id, 1235 | "polling_log": status_updates 1236 | } 1237 | 1238 | async def initialize(self, config_path: Optional[str] = None): 1239 | """Initialize the Pixverse client using configuration.""" 1240 | try: 1241 | # Load configuration 1242 | self.config = get_config(config_path or self.config_path) 1243 | 1244 | # Initialize client with config 1245 | self.client = PixverseClient( 1246 | api_key=self.config.api_key, 1247 | base_url=self.config.base_url 1248 | ) 1249 | 1250 | logger.info(f"Pixverse MCP server initialized with config from: {config_path or self.config_path or 'environment'}") 1251 | 1252 | except Exception as e: 1253 | logger.error(f"Failed to initialize Pixverse MCP server: {e}") 1254 | raise 1255 | 1256 | async def run(self): 1257 | """Run the MCP server.""" 1258 | try: 1259 | # Run the server with stdio streams 1260 | async with stdio_server() as (read_stream, write_stream): 1261 | await self.server.run( 1262 | read_stream, 1263 | write_stream, 1264 | InitializationOptions( 1265 | server_name="pixverse-mcp", 1266 | server_version="0.1.0", 1267 | capabilities=self.server.get_capabilities( 1268 | notification_options=NotificationOptions(), 1269 | experimental_capabilities=None, 1270 | ), 1271 | ), 1272 | ) 1273 | except Exception as e: 1274 | logger.error(f"Error running MCP server: {e}") 1275 | raise 1276 | 1277 | 1278 | async def main(config_path: Optional[str] = None, mode: str = "stdio"): 1279 | """Main entry point.""" 1280 | if mode == "sse": 1281 | # Run SSE server 1282 | logger.info("📡 Starting Pixverse MCP Server in SSE mode") 1283 | try: 1284 | from .sse_server import run_sse_server 1285 | await run_sse_server(port=8080, config_path=config_path) 1286 | except ImportError: 1287 | logger.error("SSE server not available. Please install FastAPI dependencies.") 1288 | raise 1289 | else: 1290 | # Run stdio server (default) 1291 | logger.info("📡 Starting Pixverse MCP Server in STDIO mode") 1292 | server = PixverseMCPServer(config_path=config_path) 1293 | await server.initialize(config_path) 1294 | await server.run() 1295 | 1296 | 1297 | async def cli_main(): 1298 | """CLI entry point with argument parsing.""" 1299 | import argparse 1300 | 1301 | parser = argparse.ArgumentParser(description="Pixverse MCP Server") 1302 | parser.add_argument("--config", help="Path to configuration file") 1303 | parser.add_argument("--sse", action="store_true", help="Run SSE server instead of stdio") 1304 | parser.add_argument("--port", type=int, default=8080, help="Port for SSE server") 1305 | 1306 | args = parser.parse_args() 1307 | config_path = args.config 1308 | mode = "sse" if args.sse else "stdio" 1309 | 1310 | await main(config_path=config_path, mode=mode) 1311 | 1312 | 1313 | if __name__ == "__main__": 1314 | asyncio.run(cli_main()) 1315 | --------------------------------------------------------------------------------