├── aafactory ├── src │ ├── __init__.py │ ├── aafactory │ │ ├── __init__.py │ │ ├── act │ │ │ └── interface.py │ │ ├── react │ │ │ └── interface.py │ │ ├── schemas.py │ │ ├── main.py │ │ ├── fetcher │ │ │ └── fetching.py │ │ ├── prompts.py │ │ ├── create_gradio_ui.py │ │ ├── configuration.py │ │ ├── settings.py │ │ ├── utils │ │ │ ├── voice.py │ │ │ └── interface.py │ │ ├── style.py │ │ ├── chat │ │ │ └── interface.py │ │ ├── database │ │ │ └── manage_db.py │ │ ├── avatar │ │ │ └── interface.py │ │ └── comfyui │ │ │ └── video.py │ └── tests │ │ ├── test_chat │ │ ├── test_chat_interface.py │ │ └── test_mock_chat.py │ │ └── test_configuration.py ├── assets │ └── demo │ │ └── avatar.jpg └── workflows │ ├── text_to_speech_with_zonos.json │ └── audio_image_to_video_with_sonic.json ├── .env.sample ├── cloud_setup ├── runpod_templates │ └── comfy_ui │ │ ├── server-requirements.txt │ │ ├── README.md │ │ ├── docker-compose.yml │ │ ├── restore_snapshot.sh │ │ ├── start.sh │ │ ├── Dockerfile │ │ └── 2025-04-26_11-26-04_snapshot.json ├── zonos │ └── pyproject.toml ├── sonic │ ├── install_sonic.sh │ └── pyproject.toml ├── joyvasa │ └── setup_joyvasa.sh └── pyproject.toml ├── github_assets ├── napoleon_example.png └── hpi-logo-white.svg ├── .vscode └── launch.json ├── pyproject.toml ├── LICENSE.md ├── .gitignore └── README.md /aafactory/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /aafactory/src/aafactory/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.env.sample: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY= 2 | ELEVENLABS_API_KEY= 3 | VOICE_ID= 4 | COMFYUI_SERVER_IP= 5 | COMFYUI_SERVER_PORT= -------------------------------------------------------------------------------- /cloud_setup/runpod_templates/comfy_ui/server-requirements.txt: -------------------------------------------------------------------------------- 1 | piexif==1.1.3 2 | segment_anything 3 | huggingface_hub -------------------------------------------------------------------------------- /aafactory/assets/demo/avatar.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AA-Factory/aafactory-prototype/HEAD/aafactory/assets/demo/avatar.jpg -------------------------------------------------------------------------------- /github_assets/napoleon_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AA-Factory/aafactory-prototype/HEAD/github_assets/napoleon_example.png -------------------------------------------------------------------------------- /aafactory/src/aafactory/act/interface.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | 3 | 4 | def create_act_interface(): 5 | with gr.Blocks() as act: 6 | gr.Markdown("Coming Soon ...") -------------------------------------------------------------------------------- /aafactory/src/aafactory/react/interface.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | 3 | def create_react_interface(): 4 | with gr.Blocks() as react: 5 | gr.Markdown("Coming Soon ...") -------------------------------------------------------------------------------- /aafactory/src/aafactory/schemas.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | 4 | class Settings(BaseModel): 5 | comfy_server_url: str 6 | comfy_server_port: str 7 | openai_api_key: str 8 | elevenlabs_api_key: str 9 | -------------------------------------------------------------------------------- /cloud_setup/runpod_templates/comfy_ui/README.md: -------------------------------------------------------------------------------- 1 | # How to run the docker 2 | 3 | ```bash 4 | docker compose up 5 | ``` 6 | 7 | # How to push the docker image to docker hub 8 | 9 | ```bash 10 | docker push repo_name/aafactory-server:version 11 | ``` -------------------------------------------------------------------------------- /cloud_setup/zonos/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "workspace" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["skip"] 6 | readme = "README.md" 7 | 8 | [tool.poetry.dependencies] 9 | python = "^3.10" 10 | 11 | [build-system] 12 | requires = ["poetry-core"] 13 | build-backend = "poetry.core.masonry.api" 14 | -------------------------------------------------------------------------------- /aafactory/src/tests/test_chat/test_chat_interface.py: -------------------------------------------------------------------------------- 1 | # Basic Initialization Test 2 | import pytest 3 | from aafactory.chat.interface import create_chat_interface# 4 | 5 | def test_chat_interface_initialization(): 6 | #Test that ChatInterface can be initialized.""" 7 | chat = create_chat_interface() 8 | assert chat is not None -------------------------------------------------------------------------------- /aafactory/src/tests/test_configuration.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | # Create test paths that are separate from production 4 | TEST_DIR = Path(__file__).parent / "test_data" 5 | TEST_DB_PATH = TEST_DIR / "test_avatar_db.json" 6 | TEST_AVATAR_IMAGE_PATH = TEST_DIR / "test_avatar.png" 7 | 8 | # Create test directories if they don't exist 9 | TEST_DIR.mkdir(parents=True, exist_ok=True) 10 | -------------------------------------------------------------------------------- /cloud_setup/runpod_templates/comfy_ui/docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | aafactory-server: 3 | build: 4 | context: . 5 | platforms: 6 | - linux/amd64 7 | image: jeney/aafactory-server:2.1 8 | environment: 9 | - SERVE_API_LOCALLY=true 10 | ports: 11 | - "8000:8000" 12 | - "8188:8188" 13 | volumes: 14 | - ./data/comfyui/output:/comfyui/output 15 | - ./data/runpod-volume:/runpod-volume 16 | -------------------------------------------------------------------------------- /aafactory/src/aafactory/main.py: -------------------------------------------------------------------------------- 1 | from aafactory.src.avatar.acting import run_avatar 2 | from aafactory.src.fetcher.fetching import run_fetcher 3 | from aafactory.src.narrator.narration import run_narration 4 | 5 | def main(): 6 | current_environment = run_fetcher(simulation=True) 7 | current_situation = run_narration(current_environment) 8 | avatar_taken_actions = run_avatar(current_situation) 9 | 10 | 11 | if __name__ == "__main__": 12 | main() -------------------------------------------------------------------------------- /cloud_setup/runpod_templates/comfy_ui/restore_snapshot.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | SNAPSHOT_FILE=$(ls /*snapshot*.json 2>/dev/null | head -n 1) 6 | 7 | if [ -z "$SNAPSHOT_FILE" ]; then 8 | echo "runpod-worker-comfy: No snapshot file found. Exiting..." 9 | exit 0 10 | fi 11 | 12 | echo "runpod-worker-comfy: restoring snapshot: $SNAPSHOT_FILE" 13 | 14 | comfy --workspace /comfyui node restore-snapshot "$SNAPSHOT_FILE" --pip-non-url 15 | 16 | echo "runpod-worker-comfy: restored snapshot file: $SNAPSHOT_FILE" -------------------------------------------------------------------------------- /cloud_setup/sonic/install_sonic.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | . /venv/bin/activate 3 | pip install poetry 4 | poetry install 5 | 6 | huggingface-cli download LeonJoe13/Sonic --local-dir ./ComfyUI/models/sonic 7 | huggingface-cli download stabilityai/stable-video-diffusion-img2vid-xt --local-dir ./ComfyUI/models/sonic/stable-video-diffusion-img2vid-xt 8 | huggingface-cli download openai/whisper-tiny --local-dir ./ComfyUI/models/sonic/whisper-tiny 9 | mv ComfyUI/models/sonic/stable-video-diffusion-img2vid-xt/svd_xt.safetensors ComfyUI/models/checkpoints/ 10 | mv ComfyUI/models/sonic/Sonic/* ComfyUI/models/sonic/ -------------------------------------------------------------------------------- /cloud_setup/runpod_templates/comfy_ui/start.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Use libtcmalloc for better memory management 4 | TCMALLOC="$(ldconfig -p | grep -Po "libtcmalloc.so.\d" | head -n 1)" 5 | export LD_PRELOAD="${TCMALLOC}" 6 | 7 | # Download models 8 | huggingface-cli download LeonJoe13/Sonic --local-dir ./models/sonic 9 | huggingface-cli download stabilityai/stable-video-diffusion-img2vid-xt --local-dir ./models/sonic/stable-video-diffusion-img2vid-xt 10 | huggingface-cli download openai/whisper-tiny --local-dir ./models/sonic/whisper-tiny 11 | mv models/sonic/stable-video-diffusion-img2vid-xt/svd_xt.safetensors models/checkpoints/ 12 | mv models/sonic/Sonic/* models/sonic/ 13 | 14 | # Start ComfyUI 15 | python3 main.py --listen 0.0.0.0 16 | -------------------------------------------------------------------------------- /aafactory/src/aafactory/fetcher/fetching.py: -------------------------------------------------------------------------------- 1 | from openai import BaseModel 2 | from openai import OpenAI 3 | import re 4 | from aafactory.database.manage_db import get_settings 5 | 6 | 7 | async def send_request_to_open_ai(messages: list[dict[str, str]]) -> BaseModel: 8 | """ 9 | Send a request to the OpenAI API. 10 | """ 11 | settings = get_settings() 12 | client = OpenAI(api_key=settings.openai_api_key) 13 | response = client.chat.completions.create( 14 | model="gpt-4o", # gpt-4o-mini 15 | messages=messages, 16 | temperature=1.0, 17 | top_p=1.0, 18 | ) 19 | raw_content = response.choices[0].message.content 20 | content_without_double_asterisks = re.sub(r'\*\*', '', raw_content) 21 | return content_without_double_asterisks 22 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "Python Debugger: Current File", 9 | "type": "debugpy", 10 | "request": "launch", 11 | "program": "${file}", 12 | "console": "integratedTerminal" 13 | }, 14 | { 15 | "name": "Run Gradio UI", 16 | "type": "debugpy", 17 | "request": "launch", 18 | "program": "${workspaceFolder}/aafactory/src/aafactory/create_gradio_ui.py", 19 | "console": "integratedTerminal", 20 | "justMyCode": false 21 | } 22 | ] 23 | } 24 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "aafactory" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["Jeremy de Gail "] 6 | readme = "README.md" 7 | packages = [{include = "aafactory", from = "./aafactory/src"}] 8 | 9 | [tool.poetry.dependencies] 10 | python = "^3.13" 11 | pydantic = "^2.10.4" 12 | gradio = "^5.9.1" 13 | openai = "^1.58.1" 14 | python-dotenv = "^1.0.1" 15 | llama-cpp-python = "^0.3.5" 16 | loguru = "^0.7.3" 17 | pandas = "^2.2.3" 18 | docstring-parser = "^0.16" 19 | tinydb = "^4.8.2" 20 | line-profiler = "^4.2.0" 21 | matplotlib = "^3.10.0" 22 | soundfile = "^0.13.1" 23 | 24 | 25 | [tool.poetry.group.dev.dependencies] 26 | pytest = "^8.3.5" 27 | pytest-cov = "^6.1.1" 28 | 29 | [build-system] 30 | requires = ["poetry-core"] 31 | build-backend = "poetry.core.masonry.api" 32 | -------------------------------------------------------------------------------- /aafactory/src/aafactory/prompts.py: -------------------------------------------------------------------------------- 1 | AVATAR_DESCRIPTION = """ 2 | Aristotle was transmigrated to the world of the internet. He has a chip in his head that allows him to understand the internet and to be connected to it. 3 | """ 4 | 5 | NEWS_PROMPT = """ 6 | Create fake news. Try to create realistic news. 7 | """ 8 | 9 | SOCIAL_MEDIA_PROMPT = """ 10 | Create fake posts from a social media. Try to create realistic posts. 11 | 12 | Examples: 13 | { 14 | "text": "Just completed a challenging task and feeling great about it!", 15 | "timestamp": "2023-10-01T12:00:00Z", 16 | "user": { 17 | "username": "HappyAvatar", 18 | "description": "Just a virtual being navigating through life.", 19 | "location": "Virtual World", 20 | "followers_count": 150, 21 | "following_count": 50, 22 | "tweet_count": 300 23 | } 24 | """ -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2025 Jeremy de Gail 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /aafactory/src/tests/test_chat/test_mock_chat.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import aafactory.chat.interface as chat_interface 3 | 4 | @pytest.mark.asyncio 5 | async def test_chat_history_clears_on_avatar_change(mocker): 6 | chat_interface.CHAT_HISTORY.clear() 7 | chat_interface.CURRENT_AVATAR = None # Reset avatar tracker 8 | 9 | mocker.patch("aafactory.chat.interface.send_request_to_open_ai", return_value="Hello, user!") 10 | mocker.patch("aafactory.chat.interface.send_request_to_elevenlabs", return_value="mock_audio_path.mp3") 11 | mocker.patch("aafactory.chat.interface.send_request_to_generate_video", return_value="mock_video_path.mp4") 12 | 13 | # First message with Avatar A 14 | await chat_interface.send_request_to_llm( 15 | "avatarA.png", "Hi Avatar A!", "AvatarA", "Friendly", "Knows stuff", 16 | "elevenlabs", "voiceidA", "path/to/recA", "transcriptA", "en" 17 | ) 18 | 19 | assert chat_interface.CHAT_HISTORY == [ 20 | ["Hi Avatar A!", "Hello, user!"], 21 | ] 22 | 23 | # Second message with Avatar B 24 | await chat_interface.send_request_to_llm( 25 | "avatarB.png", "Hello Avatar B!", "AvatarB", "Serious", "Knows more stuff", 26 | "elevenlabs", "voiceidB", "path/to/recB", "transcriptB", "en" 27 | ) 28 | assert chat_interface.CHAT_HISTORY == [ 29 | ["Hello Avatar B!", "Hello, user!"] 30 | ] 31 | -------------------------------------------------------------------------------- /cloud_setup/joyvasa/setup_joyvasa.sh: -------------------------------------------------------------------------------- 1 | # reference: https://github.com/jdh-algo/JoyVASA 2 | 3 | # Install CUDA 4 | wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-ubuntu2204.pin 5 | mv cuda-ubuntu2204.pin /etc/apt/preferences.d/cuda-repository-pin-600 6 | wget https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda-repo-ubuntu2204-12-6-local_12.6.3-560.35.05-1_amd64.deb 7 | dpkg -i cuda-repo-ubuntu2204-12-6-local_12.6.3-560.35.05-1_amd64.deb 8 | cp /var/cuda-repo-ubuntu2204-12-6-local/cuda-*-keyring.gpg /usr/share/keyrings/ 9 | apt-get update 10 | apt-get -y install cuda-toolkit-12-6 11 | 12 | # Install git-lfs 13 | curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash 14 | apt-get install git-lfs 15 | 16 | # Install JoyVASA 17 | git clone https://huggingface.co/jdh-algo/JoyVASA 18 | cd JoyVASA 19 | pip install -r requirements.txt 20 | apt-get update 21 | apt-get install ffmpeg -y 22 | 23 | cd src/utils/dependencies/XPose/models/UniPose/ops 24 | python setup.py build install 25 | cd - 26 | 27 | # Install Chinese-Hubert 28 | git clone https://huggingface.co/TencentGameMate/chinese-hubert-base 29 | cd chinese-hubert-base 30 | cd - 31 | 32 | # Install Wav2Vec2 33 | git lfs install 34 | git clone https://huggingface.co/facebook/wav2vec2-base-960h 35 | cd - 36 | 37 | # Install LivePortrait 38 | huggingface-cli download KwaiVGI/LivePortrait --local-dir pretrained_weights --exclude "*.git*" "README.md" "docs" -------------------------------------------------------------------------------- /aafactory/src/aafactory/create_gradio_ui.py: -------------------------------------------------------------------------------- 1 | from aafactory.configuration import DB_PATH, AVATAR_VOICE_RECORDINGS_PATH 2 | from aafactory.act.interface import create_act_interface 3 | from aafactory.avatar.interface import create_avatar_setup_interface 4 | from aafactory.chat.interface import create_chat_interface 5 | from aafactory.react.interface import create_react_interface 6 | from aafactory.settings import create_settings 7 | from aafactory.style import CSS 8 | from aafactory.utils.interface import create_utils_interface 9 | import gradio as gr 10 | import asyncio 11 | 12 | 13 | async def create_gradio_interface(): 14 | DB_PATH.parent.mkdir(parents=True, exist_ok=True) 15 | if not DB_PATH.exists(): 16 | DB_PATH.touch() 17 | AVATAR_VOICE_RECORDINGS_PATH.mkdir(parents=True, exist_ok=True) 18 | with gr.Blocks() as simulation: 19 | with gr.Tabs(): 20 | with gr.Tab(label="Avatar"): 21 | create_avatar_setup_interface() 22 | with gr.Tab(label="Chat"): 23 | create_chat_interface() 24 | with gr.Tab(label="React"): 25 | create_react_interface() 26 | with gr.Tab(label="Act"): 27 | create_act_interface() 28 | with gr.Tab(label="Utils"): 29 | create_utils_interface() 30 | with gr.Tab(label="Settings"): 31 | create_settings() 32 | return simulation 33 | 34 | 35 | if __name__ == "__main__": 36 | app = asyncio.run(create_gradio_interface()) 37 | app.launch(share=False) 38 | -------------------------------------------------------------------------------- /aafactory/workflows/text_to_speech_with_zonos.json: -------------------------------------------------------------------------------- 1 | { 2 | "12": { 3 | "inputs": { 4 | "audio": "voice.wav" 5 | }, 6 | "class_type": "LoadAudio", 7 | "_meta": { 8 | "title": "LoadAudio" 9 | } 10 | }, 11 | "13": { 12 | "inputs": { 13 | "audio": [ 14 | "24", 15 | 0 16 | ] 17 | }, 18 | "class_type": "PreviewAudio", 19 | "_meta": { 20 | "title": "PreviewAudio" 21 | } 22 | }, 23 | "22": { 24 | "inputs": { 25 | "happy": 1, 26 | "sad": 0, 27 | "disgust": 0, 28 | "fear": 0, 29 | "surprise": 0, 30 | "anger": 0.02, 31 | "other": 0.1, 32 | "neutral": 0.2 33 | }, 34 | "class_type": "ZonosEmotion", 35 | "_meta": { 36 | "title": "Zonos Emotion" 37 | } 38 | }, 39 | "24": { 40 | "inputs": { 41 | "speech": "Hey there! This is pretty cool right?", 42 | "seed": 580, 43 | "model_type": "Zyphra/Zonos-v0.1-transformer", 44 | "language": "en-us", 45 | "cfg_scale": 2, 46 | "min_p": 0.15, 47 | "speed": 1, 48 | "disable_compiler": true, 49 | "sample_text": "No, all that stuff, like you could have all the money in the world, all the followers, everything. It's just doesn't really make you happy.", 50 | "speaker_noised": true, 51 | "sample_audio": [ 52 | "12", 53 | 0 54 | ], 55 | "emotion": [ 56 | "22", 57 | 0 58 | ] 59 | }, 60 | "class_type": "ZonosGenerate", 61 | "_meta": { 62 | "title": "Zonos Generate" 63 | } 64 | } 65 | } -------------------------------------------------------------------------------- /cloud_setup/runpod_templates/comfy_ui/Dockerfile: -------------------------------------------------------------------------------- 1 | # Stage 1: Base image with common dependencies 2 | FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04 3 | 4 | # Prevents prompts from packages asking for user input during installation 5 | ENV DEBIAN_FRONTEND=noninteractive 6 | # Prefer binary wheels over source distributions for faster pip installations 7 | ENV PIP_PREFER_BINARY=1 8 | # Ensures output from python is printed immediately to the terminal without buffering 9 | ENV PYTHONUNBUFFERED=1 10 | # Speed up some cmake builds 11 | ENV CMAKE_BUILD_PARALLEL_LEVEL=8 12 | 13 | # Install Python, git and other necessary tools 14 | RUN apt-get update && apt-get install -y \ 15 | python3.10 \ 16 | python3-pip \ 17 | libglib2.0-0 \ 18 | git \ 19 | wget \ 20 | libgl1 \ 21 | && ln -sf /usr/bin/python3.10 /usr/bin/python \ 22 | && ln -sf /usr/bin/pip3 /usr/bin/pip 23 | 24 | # Clean up to reduce image size 25 | RUN apt-get autoremove -y && apt-get clean -y && rm -rf /var/lib/apt/lists/* 26 | 27 | # Install comfy-cli 28 | RUN pip install comfy-cli 29 | 30 | # Install ComfyUI 31 | RUN /usr/bin/yes | comfy --workspace /comfyui install --cuda-version 11.8 --nvidia --version 0.3.30 32 | 33 | # Install runpod 34 | RUN pip install runpod requests uv 35 | 36 | # Go back to the root 37 | WORKDIR / 38 | 39 | # Add scripts 40 | ADD pyproject.toml restore_snapshot.sh ./ 41 | RUN chmod +x /restore_snapshot.sh 42 | # RUN uv sync --no-cache 43 | 44 | # Optionally copy the snapshot file 45 | ADD *snapshot*.json / 46 | 47 | # Install ComfyUI 48 | RUN ./restore_snapshot.sh 49 | 50 | # Change working directory to ComfyUI 51 | WORKDIR /comfyui 52 | 53 | # Configure civitdl with API key 54 | COPY server-requirements.txt /comfyui/server-requirements.txt 55 | COPY start.sh /comfyui/start.sh 56 | RUN pip install -r server-requirements.txt 57 | 58 | RUN chmod +x /comfyui/start.sh 59 | CMD ["/comfyui/start.sh"] -------------------------------------------------------------------------------- /aafactory/src/aafactory/configuration.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import warnings 3 | 4 | warnings.filterwarnings('ignore', message='Video does not have browser-compatible container or codec.*') 5 | warnings.filterwarnings('ignore', message='You have not specified a value for the `type` parameter.*') 6 | 7 | ROOT_DIR = Path(__file__).parent.parent.parent 8 | DB_PATH = ROOT_DIR / "databases" / "avatar_db.json" 9 | GENERATED_VOICE_PATH = ROOT_DIR / "assets/generated_voices/" 10 | GENERATED_VIDEO_PATH = ROOT_DIR / "assets/generated_videos/" 11 | WORKFLOW_FOLDER = ROOT_DIR / "workflows" 12 | DEFAULT_AVATAR_IMAGE_PATH = ROOT_DIR / "assets/demo/avatar.jpg" 13 | DEFAULT_VOICE_RECORDING_PATH = ROOT_DIR / "assets/demo/voice_recording.mp3" 14 | AVATAR_IMAGES_PATH = ROOT_DIR / "assets/avatar_images" 15 | 16 | SETTINGS_TABLE_NAME = "settings" 17 | AVATAR_TABLE_NAME = "avatar" 18 | AVATAR_PAGE_SETTINGS_TABLE_NAME = "avatar_page_settings" 19 | AVATAR_VOICE_RECORDINGS_PATH = ROOT_DIR / "assets/avatar_voice_recordings" 20 | 21 | VOICE_MODELS = ["", "elevenlabs", "zonos"] 22 | VOICE_LANGUAGES = [ 23 | 'af', 'am', 'an', 'ar', 'as', 'az', 'ba', 'bg', 'bn', 'bpy', 'bs', 'ca', 'cmn', 24 | 'cs', 'cy', 'da', 'de', 'el', 'en-029', 'en-gb', 'en-gb-scotland', 'en-gb-x-gbclan', 25 | 'en-gb-x-gbcwmd', 'en-gb-x-rp', 'en-us', 'eo', 'es', 'es-419', 'et', 'eu', 'fa', 26 | 'fa-latn', 'fi', 'fr-be', 'fr-ch', 'fr-fr', 'ga', 'gd', 'gn', 'grc', 'gu', 'hak', 27 | 'hi', 'hr', 'ht', 'hu', 'hy', 'hyw', 'ia', 'id', 'is', 'it', 'ja', 'jbo', 'ka', 28 | 'kk', 'kl', 'kn', 'ko', 'kok', 'ku', 'ky', 'la', 'lfn', 'lt', 'lv', 'mi', 'mk', 29 | 'ml', 'mr', 'ms', 'mt', 'my', 'nb', 'nci', 'ne', 'nl', 'om', 'or', 'pa', 'pap', 30 | 'pl', 'pt', 'pt-br', 'py', 'quc', 'ro', 'ru', 'ru-lv', 'sd', 'shn', 'si', 'sk', 31 | 'sl', 'sq', 'sr', 'sv', 'sw', 'ta', 'te', 'tn', 'tr', 'tt', 'ur', 'uz', 'vi', 32 | 'vi-vn-x-central', 'vi-vn-x-south', 'yue' 33 | ] 34 | 35 | TEXT_TO_SPEECH_WITH_ZONOS_WORKFLOW_PATH = ROOT_DIR / "workflows" / "text_to_speech_with_zonos.json" -------------------------------------------------------------------------------- /aafactory/src/aafactory/settings.py: -------------------------------------------------------------------------------- 1 | from aafactory.configuration import DB_PATH 2 | from aafactory.schemas import Settings 3 | from dotenv import load_dotenv 4 | import gradio as gr 5 | import os 6 | from loguru import logger 7 | from tinydb import TinyDB 8 | 9 | load_dotenv() 10 | 11 | def create_settings(): 12 | with gr.Blocks() as settings: 13 | gr.Markdown("## Settings") 14 | with gr.Accordion("Comfy UI", open=False): 15 | comfy_server_url = gr.Textbox(label="ComfyUI Server URL", value=os.getenv("COMFYUI_SERVER_URL"), interactive=True) 16 | comfy_server_port = gr.Textbox(label="ComfyUI Server Port", value=os.getenv("COMFYUI_SERVER_PORT"), interactive=True) 17 | with gr.Accordion("ElevenLabs", open=False): 18 | elevenlabs_api_key = gr.Textbox(label="ElevenLabs API Key", value=os.getenv("ELEVENLABS_API_KEY"), interactive=True) 19 | with gr.Accordion("LLM", open=False): 20 | openai_api_key = gr.Textbox(label="OpenAI API Key", value=os.getenv("OPENAI_API_KEY"), interactive=True) 21 | 22 | submit_btn = gr.Button("Save Settings") 23 | submit_btn.click( 24 | fn=_save_settings_to_db, 25 | inputs=[comfy_server_url, comfy_server_port, openai_api_key, elevenlabs_api_key], 26 | outputs=[comfy_server_url, comfy_server_port, openai_api_key, elevenlabs_api_key] 27 | ) 28 | settings.load( 29 | fn=_load_settings_from_db, 30 | outputs=[comfy_server_url, comfy_server_port, openai_api_key, elevenlabs_api_key] 31 | ) 32 | 33 | def _save_settings_to_db(*args): 34 | settings = Settings( 35 | comfy_server_url=args[0], 36 | comfy_server_port=args[1], 37 | openai_api_key=args[2], 38 | elevenlabs_api_key=args[3] 39 | ) 40 | db = TinyDB(DB_PATH) 41 | db.table("settings").truncate() 42 | db.table("settings").insert(settings.model_dump()) 43 | logger.success("Settings saved") 44 | settings_dict = settings.model_dump() 45 | return [settings_dict[key] for key in [ 46 | 'comfy_server_url', 'comfy_server_port', 'openai_api_key', 47 | 'elevenlabs_api_key' 48 | ]] 49 | 50 | def _load_settings_from_db(): 51 | db = TinyDB(DB_PATH) 52 | settings_dict = db.table("settings").get(doc_id=1) 53 | if settings_dict is None: 54 | return [None, None, None, None] 55 | return [settings_dict[key] for key in [ 56 | 'comfy_server_url', 'comfy_server_port', 'openai_api_key', 57 | 'elevenlabs_api_key' 58 | ]] -------------------------------------------------------------------------------- /aafactory/workflows/audio_image_to_video_with_sonic.json: -------------------------------------------------------------------------------- 1 | { 2 | "1": { 3 | "inputs": { 4 | "sonic_unet": "unet.pth", 5 | "ip_audio_scale": 1, 6 | "use_interframe": true, 7 | "dtype": "bf16", 8 | "model": [ 9 | "5", 10 | 0 11 | ] 12 | }, 13 | "class_type": "SONICTLoader", 14 | "_meta": { 15 | "title": "SONICTLoader" 16 | } 17 | }, 18 | "2": { 19 | "inputs": { 20 | "seed": 1454942941, 21 | "inference_steps": 25, 22 | "dynamic_scale": 1, 23 | "fps": 25, 24 | "model": [ 25 | "1", 26 | 0 27 | ], 28 | "data_dict": [ 29 | "6", 30 | 0 31 | ] 32 | }, 33 | "class_type": "SONICSampler", 34 | "_meta": { 35 | "title": "SONICSampler" 36 | } 37 | }, 38 | "5": { 39 | "inputs": { 40 | "ckpt_name": "svd_xt.safetensors" 41 | }, 42 | "class_type": "ImageOnlyCheckpointLoader", 43 | "_meta": { 44 | "title": "Image Only Checkpoint Loader (img2vid model)" 45 | } 46 | }, 47 | "6": { 48 | "inputs": { 49 | "min_resolution": 320, 50 | "duration": 3.8000000000000003, 51 | "expand_ratio": 0.5, 52 | "clip_vision": [ 53 | "5", 54 | 1 55 | ], 56 | "vae": [ 57 | "5", 58 | 2 59 | ], 60 | "audio": [ 61 | "9", 62 | 0 63 | ], 64 | "image": [ 65 | "7", 66 | 0 67 | ], 68 | "weight_dtype": [ 69 | "1", 70 | 1 71 | ] 72 | }, 73 | "class_type": "SONIC_PreData", 74 | "_meta": { 75 | "title": "SONIC_PreData" 76 | } 77 | }, 78 | "7": { 79 | "inputs": { 80 | "image": "avatar.jpg", 81 | "upload": "image" 82 | }, 83 | "class_type": "LoadImage", 84 | "_meta": { 85 | "title": "Load Image" 86 | } 87 | }, 88 | "8": { 89 | "inputs": { 90 | "frame_rate": [ 91 | "2", 92 | 1 93 | ], 94 | "loop_count": 0, 95 | "filename_prefix": "AnimateDiff", 96 | "format": "video/h265-mp4", 97 | "pix_fmt": "yuv420p", 98 | "crf": 22, 99 | "save_metadata": false, 100 | "pingpong": false, 101 | "save_output": true, 102 | "images": [ 103 | "2", 104 | 0 105 | ], 106 | "audio": [ 107 | "9", 108 | 0 109 | ] 110 | }, 111 | "class_type": "VHS_VideoCombine", 112 | "_meta": { 113 | "title": "Video Combine 🎥🅥🅗🅢" 114 | } 115 | }, 116 | "9": { 117 | "inputs": { 118 | "audio": "df8f1d5e094e4614a69044a4f191b3f6.mp3" 119 | }, 120 | "class_type": "LoadAudio", 121 | "_meta": { 122 | "title": "LoadAudio" 123 | } 124 | } 125 | } -------------------------------------------------------------------------------- /cloud_setup/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "workspace" 3 | version = "0.1.0" 4 | description = "" 5 | authors = [{ name = "Reekomer" }] 6 | readme = "README.md" 7 | requires-python = ">=3.10" 8 | dependencies = [ 9 | "torch==2.4.0", 10 | "torchaudio==2.4.0", 11 | "torchvision==0.19.0", 12 | "accelerate==0.33.0", 13 | "aiohttp>=3.10.7", 14 | "aiosignal==1.3.1", 15 | "anyio==4.2.0", 16 | "argon2-cffi==23.1.0", 17 | "argon2-cffi-bindings==21.2.0", 18 | "arrow==1.3.0", 19 | "asttokens==2.4.1", 20 | "async-lru==2.0.4", 21 | "async-timeout==4.0.3", 22 | "attrs==23.2.0", 23 | "babel==2.14.0", 24 | "beautifulsoup4==4.12.3", 25 | "bleach==6.1.0", 26 | "build==1.2.1", 27 | "cachecontrol==0.14.0", 28 | "certifi==2022.12.7", 29 | "cffi==1.16.0", 30 | "charset-normalizer==2.1.1", 31 | "cleo==2.1.0", 32 | "cmake==3.25.0", 33 | "comm==0.2.1", 34 | "crashtest==0.4.1", 35 | "cryptography==43.0.0", 36 | "debugpy==1.8.0", 37 | "decorator==5.1.1", 38 | "defusedxml==0.7.1", 39 | "distlib==0.3.8", 40 | "dulwich==0.21.7", 41 | "einops==0.7.0", 42 | "entrypoints==0.4", 43 | "exceptiongroup==1.2.0", 44 | "executing==2.0.1", 45 | "fastjsonschema==2.19.1", 46 | "filelock==3.15.4", 47 | "fqdn==1.5.1", 48 | "frozenlist==1.4.1", 49 | "fsspec==2024.2.0", 50 | "gdown==5.1.0", 51 | "gitdb==4.0.11", 52 | "gitpython==3.1.41", 53 | "h11==0.14.0", 54 | "httpcore==1.0.2", 55 | "httpx==0.26.0", 56 | "huggingface-hub==0.24.5", 57 | "idna==3.4", 58 | "importlib-metadata==8.2.0", 59 | "installer==0.7.0", 60 | "isoduration==20.11.0", 61 | "jaraco-classes==3.4.0", 62 | "jedi==0.19.1", 63 | "jeepney==0.8.0", 64 | "jinja2==3.1.2", 65 | "json5==0.9.14", 66 | "jsonpointer==2.4", 67 | "jsonschema==4.21.1", 68 | "jsonschema-specifications==2023.12.1", 69 | "keyring==24.3.1", 70 | "lit==15.0.7", 71 | "lxml==5.1.0", 72 | "markupsafe==2.1.3", 73 | "matplotlib-inline==0.1.6", 74 | "matrix-client==0.4.0", 75 | "mistune==3.0.2", 76 | "more-itertools==10.4.0", 77 | "mpmath==1.3.0", 78 | "msgpack==1.0.8", 79 | "multidict==6.0.5", 80 | "nbclassic==1.0.0", 81 | "nbclient==0.9.0", 82 | "nbconvert==7.15.0", 83 | "nbformat==5.9.2", 84 | "nest-asyncio==1.6.0", 85 | "networkx==3.2.1", 86 | "numpy==1.26.3", 87 | "overrides==7.7.0", 88 | "packaging==23.2", 89 | "pandocfilters==1.5.1", 90 | "parso==0.8.3", 91 | "pexpect==4.9.0", 92 | "pillow==10.2.0", 93 | "pkginfo==1.11.1", 94 | "platformdirs==4.2.0", 95 | "poetry-core==1.9.0", 96 | "prometheus-client==0.19.0", 97 | "prompt-toolkit==3.0.43", 98 | "protobuf==5.27.3", 99 | "psutil==5.9.8", 100 | "ptyprocess==0.7.0", 101 | "pure-eval==0.2.2", 102 | "pycparser==2.21", 103 | "pygments==2.17.2", 104 | "pysocks==1.7.1", 105 | "python-dateutil==2.8.2", 106 | "pyyaml==6.0.1", 107 | "pyzmq==24.0.1", 108 | "scipy==1.12.0", 109 | "torch==2.4.0", 110 | "transformers>=4.48.1", 111 | "triton==3.0.0", 112 | "xformers==0.0.27.post2", 113 | "torchsde>=0.2.6", 114 | "opencv-python>=4.11.0.86", 115 | "jupyterlab-server==2.25.2", 116 | "jupyterlab==4.1.0", 117 | "omegaconf>=2.3.0", 118 | "diffusers>=0.32.2", 119 | "imageio>=2.37.0", 120 | "kanjize>=1.5.0", 121 | "soundfile>=0.13.1", 122 | "kornia>=0.8.0", 123 | "av>=14.2.0", 124 | "spandrel>=0.4.1", 125 | 126 | ] 127 | 128 | [build-system] 129 | requires = ["setuptools", "wheel"] 130 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | *.DS_Store 7 | 8 | # C extensions 9 | *.so 10 | 11 | streaming_response_time_test.txt 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | cover/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | .pybuilder/ 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # IPython 86 | profile_default/ 87 | ipython_config.py 88 | 89 | # pyenv 90 | # For a library or package, you might want to ignore these files since the code is 91 | # intended to run in multiple environments; otherwise, check them in: 92 | # .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # poetry 102 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 103 | # This is especially recommended for binary packages to ensure reproducibility, and is more 104 | # commonly ignored for libraries. 105 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 106 | #poetry.lock 107 | 108 | # pdm 109 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 110 | #pdm.lock 111 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 112 | # in version control. 113 | # https://pdm.fming.dev/#use-with-ide 114 | .pdm.toml 115 | 116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 117 | __pypackages__/ 118 | 119 | # Celery stuff 120 | celerybeat-schedule 121 | celerybeat.pid 122 | 123 | # SageMath parsed files 124 | *.sage.py 125 | 126 | # Environments 127 | .env 128 | .venv 129 | /env 130 | venv/ 131 | ENV/ 132 | env.bak/ 133 | venv.bak/ 134 | 135 | # Spyder project settings 136 | .spyderproject 137 | .spyproject 138 | 139 | # Rope project settings 140 | .ropeproject 141 | 142 | # mkdocs documentation 143 | /site 144 | 145 | # mypy 146 | .mypy_cache/ 147 | .dmypy.json 148 | dmypy.json 149 | 150 | # Pyre type checker 151 | .pyre/ 152 | 153 | # pytype static type analyzer 154 | .pytype/ 155 | 156 | # Cython debug symbols 157 | cython_debug/ 158 | 159 | # PyCharm 160 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 161 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 162 | # and can be added to the global gitignore or merged into this file. For a more nuclear 163 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 164 | .idea/ 165 | .gradio/ 166 | *.lprof 167 | 168 | # output files that are generated by the workflows and saved locally. 169 | avatar_images/* 170 | avatar_voice_recordings/* 171 | generated_videos/* 172 | generated_voices/* 173 | 174 | aafactory/databases/* -------------------------------------------------------------------------------- /aafactory/src/aafactory/utils/voice.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | import uuid 4 | from aafactory.comfyui.video import QueueHistory, queue_task, upload_files_to_comfyui_server 5 | from aafactory.configuration import DB_PATH, TEXT_TO_SPEECH_WITH_ZONOS_WORKFLOW_PATH, GENERATED_VOICE_PATH 6 | from aafactory.database.manage_db import get_settings 7 | from aafactory.schemas import Settings 8 | from loguru import logger 9 | import requests 10 | from tinydb import TinyDB 11 | 12 | async def send_request_to_elevenlabs(prompt: str, voice_id: str) -> Path: 13 | # Get API key from settings 14 | db = TinyDB(DB_PATH) 15 | settings = db.table("settings").all()[-1] 16 | api_key = settings["elevenlabs_api_key"] 17 | voice_id = voice_id 18 | url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}" 19 | 20 | headers = { 21 | "Accept": "audio/mpeg", 22 | "Content-Type": "application/json", 23 | "xi-api-key": api_key 24 | } 25 | 26 | data = { 27 | "text": prompt, 28 | "model_id": "eleven_multilingual_v2", 29 | "voice_settings": { 30 | "stability": 0.5, 31 | "similarity_boost": 0.5 32 | } 33 | } 34 | 35 | try: 36 | response = requests.post(url, json=data, headers=headers) 37 | response.raise_for_status() 38 | 39 | # Save the audio file 40 | output_path = GENERATED_VOICE_PATH / f"{uuid.uuid4().hex}.mp3" 41 | output_path.parent.mkdir(parents=True, exist_ok=True) 42 | with open(output_path, "wb") as f: 43 | f.write(response.content) 44 | 45 | return output_path 46 | 47 | except requests.exceptions.RequestException as e: 48 | logger.error(f"Error making request to ElevenLabs: {e}") 49 | raise e 50 | 51 | 52 | async def send_request_to_zonos(text_response: str, voice_language: str, voice_recording_path: str, audio_transcript: str) -> Path: 53 | """ 54 | Send a request to the server to generate a video. 55 | """ 56 | settings = get_settings() 57 | voice_recording_path = Path(voice_recording_path) 58 | upload_files_to_comfyui_server([voice_recording_path]) 59 | workflow = _create_text_to_speech_with_zonos_workflow(text_response, voice_language, voice_recording_path, audio_transcript) 60 | queue_history = await queue_task(workflow, settings) 61 | audio_url = _get_audio_url(settings, queue_history) 62 | output_path = _save_audio_to_file(audio_url) 63 | return output_path 64 | 65 | 66 | def _create_text_to_speech_with_zonos_workflow(text_response: str, voice_language: str, voice_recording_path: Path, audio_transcript: str) -> dict: 67 | """ 68 | Create a workflow for the text to speech with Zonos. 69 | """ 70 | with open(TEXT_TO_SPEECH_WITH_ZONOS_WORKFLOW_PATH, "r") as f: 71 | workflow = json.load(f) 72 | workflow["12"]["inputs"]["audio"] = voice_recording_path.name 73 | workflow["24"]["inputs"]["speech"] = text_response 74 | workflow["24"]["inputs"]["language"] = voice_language 75 | workflow["24"]["inputs"]["sample_text"] = audio_transcript 76 | return {"prompt": workflow} 77 | 78 | 79 | def _get_audio_url(settings: Settings, queue_history: QueueHistory) -> str: 80 | """ 81 | Get the audio URL from the history response. 82 | """ 83 | output_info = queue_history.response.get(queue_history.prompt_id, {}).get('outputs', {}).get('13', {}).get('audio', [{}])[0] 84 | filename = output_info.get('filename', 'unknown.mp3') 85 | output_url = f"{settings.comfy_server_url}/api/view?filename={filename}&subfolder=&type=temp" 86 | logger.success(f"Output URL: {output_url}") 87 | return output_url 88 | 89 | 90 | def _save_audio_to_file(audio_url: str) -> Path: 91 | """ 92 | Save the audio to a file. 93 | """ 94 | output_path = GENERATED_VOICE_PATH / f"{uuid.uuid4().hex}.mp3" 95 | output_path.parent.mkdir(parents=True, exist_ok=True) 96 | response = requests.get(audio_url) 97 | with open(output_path, "wb") as f: 98 | f.write(response.content) 99 | logger.success(f"Audio saved to {output_path}") 100 | return output_path -------------------------------------------------------------------------------- /aafactory/src/aafactory/utils/interface.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import uuid 3 | from aafactory.comfyui.video import send_request_to_generate_video 4 | from aafactory.configuration import AVATAR_PAGE_SETTINGS_TABLE_NAME, DB_PATH, DEFAULT_AVATAR_IMAGE_PATH, GENERATED_VOICE_PATH, VOICE_MODELS 5 | from aafactory.database.manage_db import AVATAR_TABLE_NAME 6 | from aafactory.utils.voice import send_request_to_elevenlabs 7 | import gradio as gr 8 | from tinydb import TinyDB 9 | import soundfile as sf 10 | 11 | 12 | def create_utils_interface(): 13 | with gr.Blocks() as utils: 14 | with gr.Accordion("Audio to Video", open=False): 15 | with gr.Row(): 16 | with gr.Column(): 17 | audio_avatar_image = gr.Textbox(value=DEFAULT_AVATAR_IMAGE_PATH, visible=False) 18 | audio_avatar_animation = gr.Video(value=DEFAULT_AVATAR_IMAGE_PATH, autoplay=True) 19 | with gr.Column(): 20 | audio_file = gr.Audio(label="Audio File") 21 | btn_generate_video = gr.Button("Generate Video") 22 | btn_generate_video.click(fn=_generate_video_from_audio, inputs=[audio_file, audio_avatar_image], outputs=[audio_avatar_animation]) 23 | with gr.Accordion("Script to Video", open=False): 24 | with gr.Row(): 25 | with gr.Column(): 26 | script_avatar_image = gr.Textbox(value=DEFAULT_AVATAR_IMAGE_PATH, visible=False) 27 | script_avatar_animation = gr.Video(value=DEFAULT_AVATAR_IMAGE_PATH, autoplay=True) 28 | with gr.Column(): 29 | gr.Markdown("### Avatar Script") 30 | avatar_script = gr.TextArea() 31 | gr.Markdown("### Voice Model") 32 | voice_model = gr.Dropdown(choices=VOICE_MODELS, value="elevenlabs", interactive=True, info="Select the voice model you want to use") 33 | gr.Markdown("### Voice ID") 34 | voice_id = gr.Textbox(show_label=False, interactive=True, info="Enter the voice id you want to use") 35 | btn_generate_video = gr.Button("Generate Video") 36 | btn_generate_video.click(fn=_generate_video_from_script, inputs=[avatar_script, script_avatar_image, voice_model, voice_id], outputs=[script_avatar_animation]) 37 | 38 | 39 | utils.load( 40 | fn=_load_avatar_infos_for_chat, 41 | outputs=[audio_avatar_animation, audio_avatar_image, script_avatar_animation, script_avatar_image] 42 | ) 43 | return utils 44 | 45 | 46 | async def _generate_video_from_audio(audio_file_bytes: bytes, avatar_image_str: bytes) -> str: 47 | # Save the audio file 48 | sample_rate, audio_data = audio_file_bytes # Unpack the tuple 49 | audio_file_path = GENERATED_VOICE_PATH / f"{uuid.uuid4().hex}.mp3" 50 | audio_file_path.parent.mkdir(parents=True, exist_ok=True) 51 | 52 | # Normalize audio data to float32 between -1 and 1 53 | audio_data = audio_data.astype('float32') 54 | if audio_data.max() > 1.0 or audio_data.min() < -1.0: 55 | audio_data = audio_data / max(abs(audio_data.max()), abs(audio_data.min())) 56 | 57 | # Save using soundfile with proper settings 58 | sf.write( 59 | str(audio_file_path), 60 | audio_data, 61 | sample_rate, 62 | format='MP3' 63 | ) 64 | 65 | avatar_image_path = Path(avatar_image_str) 66 | video_response = await send_request_to_generate_video(avatar_image_path, audio_file_path) 67 | return video_response 68 | 69 | 70 | async def _generate_video_from_script(avatar_script: str, avatar_image_str: str, voice_model: str, voice_id: str) -> str: 71 | if voice_model == "elevenlabs": 72 | audio_response = await send_request_to_elevenlabs(avatar_script, voice_id) 73 | video_response = await send_request_to_generate_video(avatar_image_str, audio_response) 74 | return video_response 75 | 76 | 77 | def _load_avatar_infos_for_chat() -> str: 78 | db = TinyDB(DB_PATH) 79 | table = db.table(AVATAR_TABLE_NAME) 80 | avatar_page_settings_table = db.table(AVATAR_PAGE_SETTINGS_TABLE_NAME) 81 | avatar_page_settings = avatar_page_settings_table.get(doc_id=1) # Changed from 0 to 1 since TinyDB starts at 1 82 | avatar_name = avatar_page_settings.get("avatar_name") 83 | avatar_info = table.get(lambda x: x.get("name") == avatar_name) 84 | if avatar_info: 85 | return ( 86 | avatar_info.get("avatar_image_path", ""), 87 | avatar_info.get("avatar_image_path", ""), 88 | avatar_info.get("avatar_image_path", ""), 89 | avatar_info.get("avatar_image_path", "") 90 | ) 91 | return DEFAULT_AVATAR_IMAGE_PATH, DEFAULT_AVATAR_IMAGE_PATH, DEFAULT_AVATAR_IMAGE_PATH, DEFAULT_AVATAR_IMAGE_PATH -------------------------------------------------------------------------------- /cloud_setup/sonic/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "workspace" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["None"] 6 | readme = "README.md" 7 | 8 | [tool.poetry.dependencies] 9 | python = "^3.10" 10 | torch = "^2.4.0" 11 | torchaudio = "^2.4.0" 12 | torchvision = "^0.19.0" 13 | accelerate = "0.33.0" 14 | aiohttp = "3.9.3" 15 | aiosignal = "1.3.1" 16 | anyio = "4.2.0" 17 | argon2-cffi = "23.1.0" 18 | argon2-cffi-bindings = "21.2.0" 19 | arrow = "1.3.0" 20 | asttokens = "2.4.1" 21 | async-lru = "2.0.4" 22 | async-timeout = "4.0.3" 23 | attrs = "23.2.0" 24 | babel = "2.14.0" 25 | beautifulsoup4 = "4.12.3" 26 | bleach = "6.1.0" 27 | build = "1.2.1" 28 | cachecontrol = "0.14.0" 29 | certifi = "2022.12.7" 30 | cffi = "1.16.0" 31 | charset-normalizer = "2.1.1" 32 | cleo = "2.1.0" 33 | cmake = "3.25.0" 34 | comm = "0.2.1" 35 | crashtest = "0.4.1" 36 | cryptography = "43.0.0" 37 | debugpy = "1.8.0" 38 | decorator = "5.1.1" 39 | defusedxml = "0.7.1" 40 | distlib = "0.3.8" 41 | dulwich = "0.21.7" 42 | einops = "0.7.0" 43 | entrypoints = "0.4" 44 | exceptiongroup = "1.2.0" 45 | executing = "2.0.1" 46 | fastjsonschema = "2.19.1" 47 | filelock = "3.15.4" 48 | fqdn = "1.5.1" 49 | frozenlist = "1.4.1" 50 | fsspec = "2024.2.0" 51 | gdown = "5.1.0" 52 | gitdb = "4.0.11" 53 | gitpython = "3.1.41" 54 | h11 = "0.14.0" 55 | httpcore = "1.0.2" 56 | httpx = "0.26.0" 57 | huggingface-hub = "0.24.5" 58 | idna = "3.4" 59 | importlib-metadata = "8.2.0" 60 | installer = "0.7.0" 61 | ipykernel = "6.29.1" 62 | ipython = "8.21.0" 63 | ipython-genutils = "0.2.0" 64 | ipywidgets = "8.1.1" 65 | isoduration = "20.11.0" 66 | jaraco-classes = "3.4.0" 67 | jedi = "0.19.1" 68 | jeepney = "0.8.0" 69 | jinja2 = "3.1.2" 70 | json5 = "0.9.14" 71 | jsonpointer = "2.4" 72 | jsonschema = "4.21.1" 73 | jsonschema-specifications = "2023.12.1" 74 | jupyter-archive = "3.4.0" 75 | jupyter-events = "0.9.0" 76 | jupyter-highlight-selected-word = "0.2.0" 77 | jupyter-lsp = "2.2.2" 78 | jupyter-nbextensions-configurator = "0.6.3" 79 | jupyter-client = "7.4.9" 80 | jupyter-contrib-core = "0.4.2" 81 | jupyter-contrib-nbextensions = "0.7.0" 82 | jupyter-core = "5.7.1" 83 | jupyter-server = "2.12.5" 84 | jupyter-server-terminals = "0.5.2" 85 | jupyterlab = "4.1.0" 86 | jupyterlab-widgets = "3.0.9" 87 | jupyterlab-pygments = "0.3.0" 88 | jupyterlab-server = "2.25.2" 89 | keyring = "24.3.1" 90 | kornia = "0.7.3" 91 | kornia-rs = "0.1.5" 92 | lit = "15.0.7" 93 | lxml = "5.1.0" 94 | markupsafe = "2.1.3" 95 | matplotlib-inline = "0.1.6" 96 | matrix-client = "0.4.0" 97 | mistune = "3.0.2" 98 | more-itertools = "10.4.0" 99 | mpmath = "1.3.0" 100 | msgpack = "1.0.8" 101 | multidict = "6.0.5" 102 | nbclassic = "1.0.0" 103 | nbclient = "0.9.0" 104 | nbconvert = "7.15.0" 105 | nbformat = "5.9.2" 106 | nest-asyncio = "1.6.0" 107 | networkx = "3.2.1" 108 | notebook = "6.5.5" 109 | notebook-shim = "0.2.3" 110 | numpy = "1.26.3" 111 | overrides = "7.7.0" 112 | packaging = "23.2" 113 | pandocfilters = "1.5.1" 114 | parso = "0.8.3" 115 | pexpect = "4.9.0" 116 | pillow = "10.2.0" 117 | pkginfo = "1.11.1" 118 | platformdirs = "4.2.0" 119 | poetry = "1.8.3" 120 | poetry-core = "1.9.0" 121 | poetry-plugin-export = "1.8.0" 122 | prometheus-client = "0.19.0" 123 | prompt-toolkit = "3.0.43" 124 | protobuf = "5.27.3" 125 | psutil = "5.9.8" 126 | ptyprocess = "0.7.0" 127 | pure-eval = "0.2.2" 128 | pycparser = "2.21" 129 | pygments = "2.17.2" 130 | pyproject-hooks = "1.1.0" 131 | pysocks = "1.7.1" 132 | python-dateutil = "2.8.2" 133 | python-json-logger = "2.0.7" 134 | pyyaml = "6.0.1" 135 | pyzmq = "24.0.1" 136 | rapidfuzz = "3.9.6" 137 | referencing = "0.33.0" 138 | regex = "2023.12.25" 139 | requests = "2.31.0" 140 | requests-toolbelt = "1.0.0" 141 | rfc3339-validator = "0.1.4" 142 | rfc3986-validator = "0.1.1" 143 | rpds-py = "0.17.1" 144 | safetensors = "0.4.2" 145 | scipy = "1.12.0" 146 | secretstorage = "3.3.3" 147 | send2trash = "1.8.2" 148 | sentencepiece = "0.2.0" 149 | shellingham = "1.5.4" 150 | six = "1.16.0" 151 | smmap = "5.0.1" 152 | sniffio = "1.3.0" 153 | soundfile = "0.12.1" 154 | soupsieve = "2.5" 155 | spandrel = "0.3.4" 156 | stack-data = "0.6.3" 157 | sympy = "1.12" 158 | terminado = "0.18.0" 159 | timm = "1.0.8" 160 | tinycss2 = "1.2.1" 161 | tokenizers = "0.15.1" 162 | tomli = "2.0.1" 163 | tomlkit = "0.13.0" 164 | torchsde = "0.2.6" 165 | tornado = "6.4" 166 | tqdm = "4.66.1" 167 | traitlets = "5.14.1" 168 | trampoline = "0.1.2" 169 | transformers = "4.37.2" 170 | triton = "^3.0.0" 171 | trove-classifiers = "2024.7.2" 172 | types-python-dateutil = "2.8.19.20240106" 173 | typing-extensions = "4.8.0" 174 | uri-template = "1.3.0" 175 | urllib3 = "1.26.13" 176 | virtualenv = "20.26.3" 177 | wcwidth = "0.2.13" 178 | webcolors = "1.13" 179 | webencodings = "0.5.1" 180 | websocket-client = "1.7.0" 181 | widgetsnbextension = "4.0.9" 182 | xformers = "^0.0.27.post2" 183 | yarl = "1.9.4" 184 | zipp = "3.19.2" 185 | insightface = "^0.7.3" 186 | 187 | 188 | [build-system] 189 | requires = ["poetry-core"] 190 | build-backend = "poetry.core.masonry.api" 191 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AI Avatar Factory 2 | 3 | ⚡ AI Avatar Factory is an interface for creating and managing AI avatars. ⚡ 4 | 5 | [![Website](https://img.shields.io/badge/website-000000?style=for-the-badge&logo=AAFactory.xyz&logoColor=white 6 | )](https://aafactory.xyz/) 7 | [![Discord](https://img.shields.io/badge/Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/C2Rjy8Q2ER) 8 | 9 | ![AAFactory Screenshot](https://github.com/Reekomer/aafactory/blob/main/github_assets/napoleon_example.png?raw=true) 10 | 11 | 12 | # IMPORTANT 13 | - we are building a better UI for the project in here: https://github.com/orgs/AA-Factory/repositories. 14 | This repository is being deprecated. 15 | 16 | ## File Tree 17 | 18 | ```bash 19 | ├── LICENSE.md 20 | ├── README.md 21 | ├── aafactory 22 | │   ├── assets 23 | │   │   ├── avatar_images 24 | │   │   │   └── ...png 25 | │   │   ├── avatar_voice_recordings 26 | │   │   │   └── ...wav 27 | │   │   ├── demo 28 | │   │   │   ├── avatar.jpg 29 | │   │   │   ├── avatar.mp4 30 | │   │   │   └── voice_recording.mp3 31 | │   │   ├── generated_video 32 | │   │   ├── generated_videos 33 | │   │   │   └── ...mp4 34 | │   │   ├── generated_voice 35 | │   │   │   └── ...mp3 36 | │   │   └── generated_voices 37 | │   │   └── ...mp3 38 | │   ├── databases 39 | │   │   └── avatar_db.json 40 | │   ├── src 41 | │   │   ├── __init__.py 42 | │   │   └── aafactory 43 | │   │   ├── __init__.py 44 | │   │   ├── act 45 | │   │   │   └── interface.py 46 | │   │   ├── avatar 47 | │   │   │   └── interface.py 48 | │   │   ├── chat 49 | │   │   │   └── interface.py 50 | │   │   ├── comfyui 51 | │   │   │   └── video.py 52 | │   │   ├── configuration.py 53 | │   │   ├── create_gradio_ui.py 54 | │   │   ├── database 55 | │   │   │   └── manage_db.py 56 | │   │   ├── fetcher 57 | │   │   │   ├── environment_objects.py 58 | │   │   │   └── fetching.py 59 | │   │   ├── main.py 60 | │   │   ├── prompts.py 61 | │   │   ├── react 62 | │   │   │   └── interface.py 63 | │   │   ├── schemas.py 64 | │   │   ├── settings.py 65 | │   │   ├── style.py 66 | │   │   └── utils 67 | │   │   ├── interface.py 68 | │   │   └── voice.py 69 | │   ├── tests 70 | │   └── workflows 71 | │   ├── audio_image_to_video_with_sonic.json 72 | │   └── text_to_speech_with_zonos.json 73 | ├── cloud_setup 74 | │   ├── joyvasa 75 | │   │   └── setup_joyvasa.sh 76 | │   ├── pyproject.toml 77 | │   ├── sonic 78 | │   │   ├── install_sonic.sh 79 | │   │   └── pyproject.toml 80 | │   ├── uv.lock 81 | │   └── zonos 82 | │   └── pyproject.toml 83 | ├── github_assets 84 | │   ├── hpi-logo-white.svg 85 | │   └── napoleon_example.png 86 | ├── poetry.lock 87 | ├── pyproject.toml 88 | └── file_tree.txt 89 | ``` 90 | 91 | 92 | ## Tutorial: 93 | - Youtube tutorial: https://www.youtube.com/watch?v=MGmBf7OsFJk 94 | ## Installation 95 | 96 | Install the required packages by running the following commands: 97 | 98 | ```bash 99 | pip install poetry 100 | ``` 101 | 102 | ```bash 103 | poetry install 104 | ``` 105 | 106 | ### ComfyUI 107 | - Use Video Helper Suite v1.5.0 (can be selected in ComfyManager) 108 | 109 | ## Run the application 110 | 111 | If you use VSCode, you can run the application by clicking on the `Run and Debug` button and selecting `Python: Run and Debug` and then `Run Gradio UI`. 112 | 113 | If you don't use VSCode, you can run the application by running the following command: 114 | 115 | ```bash 116 | python aafactory/src/aafactory/create_gradio_ui.py 117 | ``` 118 | 119 | You will also need: 120 | - ElevenLabs API key 121 | - OpenAI API key 122 | - ComfyUI server URL 123 | 124 | For ComfyUI, the worflow is defined in the `workflows` folder. You need to make sure the nodes are installed. A more detailed guide will be available soon. 125 | 126 | ### Runpod Template for ComfyUI 127 | 128 | - [Template](https://runpod.io/console/deploy?template=laidmkkjli&ref=uw67f0zc) 129 | 130 | 131 | ### Current Tech Stack: 132 | 133 | - Gradio – Frontend 134 | 135 | - ComfyUI – Backend 136 | 137 | - OpenAI API – LLM 138 | 139 | - ElevenLabs – TTS 140 | 141 | - Flux – Text-to-Image (Avatar Generation) 142 | 143 | - Sonic – Audio-Driven Video Generation 144 | 145 | 146 | ## More Examples 147 | 148 | See our website for more examples: [AAFactory.xyz](https://aafactory.xyz/) 149 | 150 | 151 | ## Incoming Features 152 | 153 | - [ ] Add support for Hugging Face models (Text to Speech and Text to Text) 154 | - [x] Create documentation for ComfyUI cloud hosting 155 | - [ ] Improve ComfyUI cloud hosting setup 156 | - [x] Enable users to manage several avatars 157 | - [ ] Enable users to easily share avatar's setup with others 158 | - [ ] Add feature to let an avatar react to a Youtube video 159 | - [ ] Add microphone button for direct chat with Avatar 160 | 161 | 162 | ## Partners 163 | 164 | ![HPI Logo](https://github.com/Reekomer/aafactory/blob/main/github_assets/hpi-logo-white.svg?raw=true) 165 | -------------------------------------------------------------------------------- /aafactory/src/aafactory/style.py: -------------------------------------------------------------------------------- 1 | CSS = """ 2 | /* Main theme colors */ 3 | :root { 4 | --primary-color: #73111e; 5 | --secondary-color: #18A1FA; 6 | --accent-color: #06BEE1; 7 | --background-dark: #0F172A; 8 | --background-light: #1E293B; 9 | --text-light: #F8FAFC; 10 | --text-muted: #94A3B8; 11 | --border-color: rgba(45, 127, 249, 0.2); 12 | --shadow-color: rgba(45, 127, 249, 0.1); 13 | --gradient-bg: linear-gradient(135deg, var(--background-dark) 0%, var(--background-light) 100%); 14 | } 15 | 16 | /* Global styles */ 17 | .gradio-container { 18 | background: var(--gradient-bg) !important; 19 | color: var(--text-light) !important; 20 | font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important; 21 | } 22 | 23 | /* Headers */ 24 | h1, h2, h3 { 25 | color: var(--text-light) !important; 26 | font-weight: 600 !important; 27 | letter-spacing: -0.02em !important; 28 | margin-bottom: 1rem !important; 29 | } 30 | 31 | /* Buttons */ 32 | button, .button { 33 | background: var(--primary-color) !important; 34 | border: none !important; 35 | color: var(--text-light) !important; 36 | padding: 0.5rem 1rem !important; 37 | border-radius: 8px !important; 38 | font-weight: 500 !important; 39 | transition: all 0.2s ease !important; 40 | box-shadow: 0 2px 4px var(--shadow-color) !important; 41 | } 42 | 43 | button:hover, .button:hover { 44 | background: var(--secondary-color) !important; 45 | transform: translateY(-1px) !important; 46 | box-shadow: 0 4px 8px var(--shadow-color) !important; 47 | } 48 | 49 | /* Input fields */ 50 | input, textarea { 51 | background: var(--background-light) !important; 52 | border: 1px solid var(--border-color) !important; 53 | color: var(--text-light) !important; 54 | border-radius: 8px !important; 55 | padding: 0.75rem !important; 56 | transition: all 0.2s ease !important; 57 | } 58 | 59 | input:focus, textarea:focus { 60 | border-color: var(--primary-color) !important; 61 | box-shadow: 0 0 0 2px var(--shadow-color) !important; 62 | outline: none !important; 63 | } 64 | 65 | /* Labels */ 66 | label { 67 | color: var(--text-muted) !important; 68 | font-size: 0.875rem !important; 69 | font-weight: 500 !important; 70 | margin-bottom: 0.5rem !important; 71 | } 72 | 73 | /* Chat interface */ 74 | .chatbot { 75 | background: var(--background-light) !important; 76 | border: 1px solid var(--border-color) !important; 77 | border-radius: 12px !important; 78 | box-shadow: 0 4px 6px var(--shadow-color) !important; 79 | overflow: hidden !important; 80 | } 81 | 82 | .message { 83 | background: var(--background-dark) !important; 84 | border-radius: 8px !important; 85 | margin: 0.5rem !important; 86 | padding: 1rem !important; 87 | color: var(--text-light) !important; 88 | } 89 | 90 | /* Dropdowns and Selects */ 91 | select, .select { 92 | background: var(--background-light) !important; 93 | border: 1px solid var(--border-color) !important; 94 | border-radius: 8px !important; 95 | color: var(--text-light) !important; 96 | padding: 0.5rem !important; 97 | } 98 | 99 | /* Progress bars */ 100 | .progress-bar { 101 | background: var(--primary-color) !important; 102 | height: 4px !important; 103 | border-radius: 2px !important; 104 | } 105 | 106 | /* Scrollbar */ 107 | ::-webkit-scrollbar { 108 | width: 8px; 109 | height: 8px; 110 | } 111 | 112 | ::-webkit-scrollbar-track { 113 | background: var(--background-dark); 114 | border-radius: 4px; 115 | } 116 | 117 | ::-webkit-scrollbar-thumb { 118 | background: var(--primary-color); 119 | border-radius: 4px; 120 | } 121 | 122 | ::-webkit-scrollbar-thumb:hover { 123 | background: var(--secondary-color); 124 | } 125 | 126 | /* Containers and Cards */ 127 | .container, .card { 128 | background: var(--background-light) !important; 129 | border: 1px solid var(--border-color) !important; 130 | border-radius: 12px !important; 131 | padding: 1.5rem !important; 132 | margin: 1rem 0 !important; 133 | box-shadow: 0 4px 6px var(--shadow-color) !important; 134 | } 135 | 136 | /* Fix for Gradio specific elements */ 137 | .gr-box, .gr-form { 138 | border-radius: 12px !important; 139 | border: 1px solid var(--border-color) !important; 140 | background: var(--background-light) !important; 141 | } 142 | 143 | .gr-padded { 144 | padding: 1.5rem !important; 145 | } 146 | 147 | /* Responsive adjustments */ 148 | @media (max-width: 640px) { 149 | .container, .card { 150 | padding: 1rem !important; 151 | } 152 | 153 | button, .button { 154 | width: 100% !important; 155 | } 156 | } 157 | 158 | /* Audio and Video elements */ 159 | audio, video { 160 | border-radius: 8px !important; 161 | background: var(--background-dark) !important; 162 | margin: 0.5rem 0 !important; 163 | } 164 | 165 | /* File upload areas */ 166 | .upload-box { 167 | border: 2px dashed var(--border-color) !important; 168 | border-radius: 12px !important; 169 | background: var(--background-dark) !important; 170 | padding: 2rem !important; 171 | text-align: center !important; 172 | } 173 | 174 | .upload-box:hover { 175 | border-color: var(--primary-color) !important; 176 | } 177 | """ -------------------------------------------------------------------------------- /aafactory/src/aafactory/chat/interface.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from aafactory.comfyui.video import send_request_to_generate_video 3 | from aafactory.configuration import AVATAR_PAGE_SETTINGS_TABLE_NAME, DB_PATH, DEFAULT_AVATAR_IMAGE_PATH, VOICE_MODELS 4 | from aafactory.database.manage_db import AVATAR_TABLE_NAME 5 | from aafactory.fetcher.fetching import send_request_to_open_ai 6 | from aafactory.utils.voice import send_request_to_elevenlabs, send_request_to_zonos 7 | import gradio as gr 8 | from PIL import Image 9 | from string import Template 10 | 11 | from tinydb import TinyDB 12 | 13 | CHAT_HISTORY = [] 14 | CURRENT_AVATAR = None # Global variable to track current avatar 15 | SYSTEM_PROMPT = Template(""" 16 | Your name is $name. 17 | Personality: 18 | $personality 19 | 20 | Background Knowledge: 21 | $background_knowledge 22 | """) 23 | 24 | def create_chat_interface(): 25 | with gr.Blocks() as chat: 26 | with gr.Row(): 27 | with gr.Column(): 28 | name = gr.Textbox(label="Name", visible=False) 29 | personality = gr.Textbox(label="Personality", visible=False) 30 | background_knowledge = gr.Textbox(label="Background Knowledge", visible=False) 31 | voice_model = gr.Dropdown(label="Voice Model", choices=VOICE_MODELS, visible=False) 32 | voice_id = gr.Textbox(label="Voice ID", visible=False) 33 | voice_language = gr.Textbox(label="Voice Language", visible=False) 34 | voice_recording_path = gr.Textbox(label="Voice Recording", visible=False) 35 | audio_transcript = gr.Textbox(label="Audio Transcript", visible=False) 36 | avatar_image = gr.Textbox(value=DEFAULT_AVATAR_IMAGE_PATH, visible=False) 37 | avatar_animation = gr.Video(value=DEFAULT_AVATAR_IMAGE_PATH, autoplay=True) 38 | with gr.Column(): 39 | chatbot = gr.Chatbot(placeholder="Your Personal Avatar
Ask Me Anything") 40 | chatbot.like(vote, None, None) 41 | msg = gr.Textbox(label="Message") 42 | submit_btn = gr.Button("Send") 43 | submit_btn.click( 44 | fn=send_request_to_llm, 45 | inputs=[avatar_image, msg, name, personality, background_knowledge, voice_model, voice_id, voice_recording_path, audio_transcript, voice_language], 46 | outputs=[msg, chatbot, avatar_animation] 47 | ) 48 | # Add refresh event 49 | chat.load( 50 | fn=_load_avatar_infos_for_chat, 51 | outputs=[name, personality, background_knowledge, avatar_animation, voice_model, voice_id, voice_recording_path, audio_transcript, voice_language, avatar_image] 52 | ) 53 | return chat 54 | 55 | 56 | def _load_avatar_infos_for_chat(): 57 | db = TinyDB(DB_PATH) 58 | table = db.table(AVATAR_TABLE_NAME) 59 | avatar_page_settings_table = db.table(AVATAR_PAGE_SETTINGS_TABLE_NAME) 60 | avatar_page_settings = avatar_page_settings_table.get(doc_id=1) # Changed from 0 to 1 since TinyDB starts at 1 61 | avatar_name = avatar_page_settings.get("avatar_name") 62 | avatar_info = table.get(lambda x: x.get("name") == avatar_name) 63 | if avatar_info: 64 | return ( 65 | avatar_info.get("name", ""), 66 | avatar_info.get("personality", ""), 67 | avatar_info.get("background_knowledge", ""), 68 | avatar_info.get("avatar_image_path", "") , # for video 69 | avatar_info.get("voice_model", "elevenlabs"), 70 | avatar_info.get("voice_id", ""), 71 | avatar_info.get("voice_recording_path", ""), 72 | avatar_info.get("audio_transcript", ""), 73 | avatar_info.get("voice_language", ""), 74 | avatar_info.get("avatar_image_path", "") # for image path 75 | ) 76 | return "", "", "", DEFAULT_AVATAR_IMAGE_PATH, "elevenlabs", "", "", "", "", "" 77 | 78 | async def send_request_to_llm(avatar_image_path: str, user_prompt: str, name: str, personality: str, background_knowledge: str, voice_model: str, voice_id: str, voice_recording_path: str, audio_transcript: str, voice_language: str) -> tuple[str, list, str]: 79 | global CURRENT_AVATAR 80 | 81 | # Clear history if avatar changed 82 | if CURRENT_AVATAR != name: 83 | CHAT_HISTORY.clear() 84 | CURRENT_AVATAR = name 85 | 86 | user_message = user_prompt 87 | avatar_image_path = Path(avatar_image_path) 88 | if len(CHAT_HISTORY) > 0: 89 | user_message = CHAT_HISTORY[-1][0] + user_prompt 90 | messages = [ 91 | {"role": "system", "content": SYSTEM_PROMPT.substitute(name=name, personality=personality, background_knowledge=background_knowledge)}, 92 | {"role": "user", "content": user_message}, 93 | ] 94 | text_response = await send_request_to_open_ai(messages) 95 | if voice_model == "elevenlabs": 96 | audio_response = await send_request_to_elevenlabs(text_response, voice_id) 97 | elif voice_model == "zonos": 98 | audio_response = await send_request_to_zonos(text_response, voice_language, voice_recording_path, audio_transcript) 99 | video_response = await send_request_to_generate_video(avatar_image_path, audio_response) 100 | # Return empty message (to clear input), updated history, and animation path 101 | CHAT_HISTORY.append([user_prompt, text_response]) 102 | return "", CHAT_HISTORY, video_response 103 | 104 | def vote(data: gr.LikeData): 105 | if data.liked: 106 | print("You upvoted this response: " + data.value["value"]) 107 | else: 108 | print("You downvoted this response: " + data.value["value"]) 109 | 110 | 111 | def update_avatar_image(image: Image.Image): 112 | global AVATAR_IMAGE 113 | AVATAR_IMAGE = image 114 | return image 115 | -------------------------------------------------------------------------------- /aafactory/src/aafactory/database/manage_db.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | import numpy as np 3 | import gradio as gr 4 | import soundfile as sf 5 | from aafactory.schemas import Settings 6 | from loguru import logger 7 | from tinydb import TinyDB 8 | from PIL import Image 9 | from pathlib import Path 10 | from aafactory.configuration import AVATAR_PAGE_SETTINGS_TABLE_NAME, AVATAR_TABLE_NAME, DB_PATH, AVATAR_IMAGES_PATH, DEFAULT_VOICE_RECORDING_PATH, SETTINGS_TABLE_NAME, AVATAR_VOICE_RECORDINGS_PATH 11 | 12 | 13 | def update_avatar_infos(name: str, personality: str, background_knowledge: str, avatar_image: Image.Image, voice_model: str, voice_id: str, voice_recording: bytes, audio_transcript: str, voice_language: str) -> None: 14 | """ 15 | Update the avatar infos in the database. 16 | """ 17 | db = TinyDB(DB_PATH) 18 | table = db.table(AVATAR_TABLE_NAME) 19 | # Check if avatar with same name already exists 20 | existing_avatar = table.get(lambda x: x.get('name') == name) 21 | if existing_avatar: 22 | logger.warning(f"Avatar with name '{name}' already exists") 23 | gr.Warning(f"Avatar with name '{name}' already exists. Please choose a different name.") 24 | return 25 | avatar_image_path = _save_avatar_image(avatar_image, AVATAR_IMAGES_PATH) 26 | if voice_model == "elevenlabs": 27 | avatar_infos = {"name": name, "personality": personality, "background_knowledge": background_knowledge, "avatar_image_path": avatar_image_path, "voice_model": voice_model, "voice_id": voice_id} 28 | elif voice_model == "zonos": 29 | voice_recording_file_path = _save_voice_recording(voice_recording) 30 | avatar_infos = {"name": name, "personality": personality, "background_knowledge": background_knowledge, "avatar_image_path": avatar_image_path, "voice_model": voice_model, "voice_language": voice_language, "voice_recording_path": voice_recording_file_path, "audio_transcript": audio_transcript} 31 | table.insert(avatar_infos) 32 | logger.success(f"Avatar infos updated: {avatar_infos}") 33 | gr.Info("Avatar infos updated",) 34 | 35 | 36 | def _save_voice_recording(voice_recording: tuple[int, np.ndarray]) -> str: 37 | """ 38 | Save the voice recording to the voice recording path. 39 | """ 40 | voice_recording_file_path = AVATAR_VOICE_RECORDINGS_PATH / f"{uuid.uuid4()}.wav" 41 | sf.write(voice_recording_file_path, voice_recording[1], voice_recording[0]) 42 | return voice_recording_file_path.as_posix() 43 | 44 | 45 | def _save_avatar_image(avatar_image: Image.Image, avatar_image_folder: Path) -> str: 46 | """ 47 | Save the avatar image to the avatar image path. 48 | """ 49 | avatar_image_folder.mkdir(parents=True, exist_ok=True) 50 | avatar_image_path = avatar_image_folder / f"{uuid.uuid4()}.png" 51 | avatar_image.save(avatar_image_path) 52 | return avatar_image_path.as_posix() 53 | 54 | def load_avatar_infos() -> tuple[str, str, str, Image.Image, str, str, str, str]: 55 | """ 56 | Load the avatar infos from the database. 57 | """ 58 | db = TinyDB(DB_PATH) 59 | avatar_table = db.table(AVATAR_TABLE_NAME) 60 | avatar_page_settings_table = db.table(AVATAR_PAGE_SETTINGS_TABLE_NAME) 61 | avatar_page_settings = avatar_page_settings_table.get(doc_id=1) # Changed from 0 to 1 since TinyDB starts at 1 62 | avatar_name = avatar_page_settings.get("avatar_name") 63 | avatar_info = avatar_table.get(lambda x: x.get("name") == avatar_name) 64 | if avatar_page_settings.get("is_creating_new_avatar"): 65 | return "", "", "", None, "", "", None, "", "" 66 | if avatar_info: 67 | gr.Info(f"Current loaded avatar is {avatar_name}") 68 | return ( 69 | avatar_info.get("name", ""), 70 | avatar_info.get("personality", ""), 71 | avatar_info.get("background_knowledge", ""), 72 | avatar_info.get("avatar_image_path", ""), 73 | avatar_info.get("voice_model", ""), 74 | avatar_info.get("voice_id", ""), 75 | avatar_info.get("voice_recording_path", None), 76 | avatar_info.get("audio_transcript", ""), 77 | avatar_info.get("voice_language", "") 78 | ) 79 | return "", "", "", None, "", "", None, "", "" 80 | 81 | def load_selected_avatar_infos(avatar_name: str) -> tuple[str, str, str, Image.Image, str, str, str, str]: 82 | """ 83 | Load the avatar infos from the database. 84 | """ 85 | db = TinyDB(DB_PATH) 86 | avatar_table = db.table(AVATAR_TABLE_NAME) 87 | avatar_info = avatar_table.get(lambda x: x.get("name") == avatar_name) # Changed from 0 to 1 since TinyDB starts at 1 88 | if avatar_info: 89 | save_avatar_page_settings(False, avatar_name) 90 | return ( 91 | avatar_info.get("name", ""), 92 | avatar_info.get("personality", ""), 93 | avatar_info.get("background_knowledge", ""), 94 | avatar_info.get("avatar_image_path", ""), 95 | avatar_info.get("voice_model", ""), 96 | avatar_info.get("voice_id", ""), 97 | avatar_info.get("voice_recording_path", None), 98 | avatar_info.get("audio_transcript", ""), 99 | avatar_info.get("voice_language", "") 100 | ) 101 | return "", "", "", None, "", "", None, "", "" 102 | 103 | 104 | def get_settings() -> Settings: 105 | """ 106 | Get the settings from the database. 107 | """ 108 | db = TinyDB(DB_PATH) 109 | table = db.table(SETTINGS_TABLE_NAME) 110 | return Settings(**table.get(doc_id=1)) 111 | 112 | def save_avatar_page_settings(is_creating_new_avatar: bool, avatar_name: str | None = None) -> None: 113 | """ 114 | Save the avatar page settings in the database. 115 | """ 116 | db = TinyDB(DB_PATH) 117 | table = db.table(AVATAR_PAGE_SETTINGS_TABLE_NAME) 118 | if AVATAR_PAGE_SETTINGS_TABLE_NAME in db.tables(): 119 | db.drop_table(AVATAR_PAGE_SETTINGS_TABLE_NAME) 120 | table.insert({"is_creating_new_avatar": is_creating_new_avatar, "avatar_name": avatar_name}) 121 | logger.success(f"Avatar page settings updated: {is_creating_new_avatar} {avatar_name}") 122 | 123 | 124 | def get_available_avatars() -> list[str]: 125 | """ 126 | Get the available avatars from the database. 127 | """ 128 | db = TinyDB(DB_PATH) 129 | table = db.table(AVATAR_TABLE_NAME) 130 | return [avatar.get("name") for avatar in table.all()] -------------------------------------------------------------------------------- /aafactory/src/aafactory/avatar/interface.py: -------------------------------------------------------------------------------- 1 | from aafactory.database.manage_db import get_available_avatars, load_avatar_infos, load_selected_avatar_infos, save_avatar_page_settings, update_avatar_infos 2 | import gradio as gr 3 | from aafactory.configuration import VOICE_LANGUAGES, VOICE_MODELS 4 | 5 | def create_avatar_setup_interface() -> None: 6 | """ 7 | Create the avatar setup interface. 8 | """ 9 | avatar_setup = gr.Row() 10 | available_avatars = gr.Dropdown(choices=[], visible=False) 11 | avatar_infos = gr.Accordion(visible=False) 12 | voice_settings = gr.Accordion(visible=False) 13 | submit_btn = gr.Button("Save Avatar Infos", visible=False) 14 | with gr.Blocks() as define_avatar: 15 | with avatar_setup: 16 | with gr.Column() as create_avatar: 17 | create_avatar_btn = gr.Button("Create New Avatar") 18 | create_avatar_btn.click( 19 | fn=_create_avatar_infos, 20 | inputs=[], 21 | outputs=[avatar_infos, voice_settings, submit_btn] 22 | ) 23 | with gr.Column() as load_avatar: 24 | load_avatar_btn = gr.Button("Load Existing Avatar") 25 | load_avatar_btn.click( 26 | fn=_load_available_avatars, 27 | inputs=[], 28 | outputs=[available_avatars, avatar_infos, voice_settings, submit_btn] 29 | ) 30 | 31 | with avatar_infos: 32 | gr.Markdown("### Name") 33 | name = gr.Textbox(show_label=False, info="Enter the name of your avatar") 34 | gr.Markdown("### Personality") 35 | personality = gr.TextArea(show_label=False, info="Enter the personality of your avatar") 36 | gr.Markdown("### Background Knowledge") 37 | background_knowledge = gr.TextArea(show_label=False, info="Enter the background knowledge of your avatar") 38 | gr.Markdown("### Avatar Image") 39 | avatar_image = gr.Image(sources=["upload"], type="pil", show_label=False, show_download_button=False, show_fullscreen_button=False) 40 | with voice_settings: 41 | gr.Markdown("### Voice Model") 42 | voice_model = gr.Dropdown(choices=VOICE_MODELS, value="elevenlabs", interactive=True, info="Select the voice model you want to use") 43 | voice_id = gr.Textbox(show_label=False, visible=False, interactive=True, info="Enter the voice id you want to use") 44 | voice_recording = gr.Audio(show_label=False, visible=False, interactive=True, label="Upload a voice sample") 45 | audio_transcript = gr.TextArea(show_label=False, visible=False, interactive=True, info="Enter the audio transcript you want to use") 46 | voice_language = gr.Dropdown(choices=VOICE_LANGUAGES, value="en-us", visible=False, interactive=True, info="Select the voice language you want to use") 47 | available_avatars.change( 48 | fn=load_selected_avatar_infos, 49 | inputs=[available_avatars], 50 | outputs=[name, personality, background_knowledge, avatar_image, voice_model, voice_id, voice_recording, audio_transcript, voice_language] 51 | ) 52 | submit_btn.click( 53 | fn=update_avatar_infos, 54 | inputs=[name, personality, background_knowledge, avatar_image, voice_model, voice_id, voice_recording, audio_transcript, voice_language] 55 | ) 56 | voice_model.change( 57 | fn=_adapt_ui_to_voice_model, 58 | inputs=[voice_model], 59 | outputs=[voice_id, voice_recording, audio_transcript, voice_language] 60 | ) 61 | # Add refresh event 62 | define_avatar.load( 63 | fn=load_avatar_infos, 64 | inputs=[], 65 | outputs=[name, personality, background_knowledge, avatar_image, voice_model, voice_id, voice_recording, audio_transcript, voice_language] 66 | ) 67 | 68 | def _load_available_avatars() -> tuple[gr.Dropdown, gr.Accordion, gr.Accordion, gr.Button]: 69 | """ 70 | Load the available avatars. 71 | """ 72 | save_avatar_page_settings(False) 73 | available_avatars = get_available_avatars() 74 | return gr.Dropdown(choices=available_avatars, interactive=True, visible=True), gr.Accordion("Avatar Infos", visible=True), gr.Accordion("Voice Settings", visible=True), gr.Button("Save Avatar Infos", visible=True) 75 | 76 | 77 | def _adapt_ui_to_voice_model(voice_model: str) -> tuple[gr.Textbox, gr.Audio, gr.TextArea]: 78 | """ 79 | Adapt the UI to the voice model. 80 | """ 81 | if voice_model == "elevenlabs": 82 | return gr.Textbox(show_label=False, visible=True, interactive=True, info="Enter the voice id you want to use"), gr.Audio(show_label=False, visible=False, interactive=True, label="Upload a voice sample"), gr.TextArea(show_label=False, visible=False, interactive=True, info="Enter the audio transcript you want to use"), gr.Dropdown(choices=VOICE_LANGUAGES, value="en-us", visible=False, interactive=True, info="Select the voice language you want to use") 83 | if voice_model == "zonos": 84 | gr.Markdown("### Clone a voice") 85 | return gr.Textbox(show_label=False, visible=False, interactive=True, info="Enter the voice id you want to use"), gr.Audio(show_label=False, visible=True, interactive=True, label="Upload a voice sample"), gr.TextArea(show_label=False, visible=True, interactive=True, info="Enter the audio transcript you want to use"), gr.Dropdown(choices=VOICE_LANGUAGES, value="en-us", visible=True, interactive=True, info="Select the voice language you want to use") 86 | if voice_model == "": 87 | return gr.Textbox(show_label=False, visible=True, interactive=True, info="Enter the voice id you want to use"), gr.Audio(show_label=False, visible=False, interactive=True, label="Upload a voice sample"), gr.TextArea(show_label=False, visible=False, interactive=True, info="Enter the audio transcript you want to use"), gr.Dropdown(choices=VOICE_LANGUAGES, value="en-us", visible=False, interactive=True, info="Select the voice language you want to use") 88 | raise ValueError(f"Voice model {voice_model} not supported") 89 | 90 | def _create_avatar_infos() -> tuple[gr.Accordion, gr.Accordion, gr.Button]: 91 | """ 92 | Create the avatar infos. 93 | """ 94 | save_avatar_page_settings(True) 95 | gr.Info("Creating new avatar. Please reload the page to empty the fields.",) 96 | return gr.Accordion("Avatar Infos", visible=True), gr.Accordion("Voice Settings", visible=True), gr.Button("Save Avatar Infos", visible=True) 97 | 98 | -------------------------------------------------------------------------------- /aafactory/src/aafactory/comfyui/video.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | import time 4 | import uuid 5 | from aafactory.configuration import GENERATED_VIDEO_PATH, WORKFLOW_FOLDER 6 | from aafactory.schemas import Settings 7 | from loguru import logger 8 | from pydantic import BaseModel 9 | import requests 10 | import soundfile as sf 11 | from aafactory.database.manage_db import get_settings 12 | 13 | 14 | class QueueHistory(BaseModel): 15 | prompt_id: str 16 | response: dict 17 | 18 | 19 | async def send_request_to_generate_video(avatar_image_path: Path, audio_file_path: Path) -> Path: 20 | """ 21 | Send a request to the server to generate a video. 22 | """ 23 | settings = get_settings() 24 | avatar_image_path = Path(avatar_image_path) 25 | upload_files_to_comfyui_server([avatar_image_path, audio_file_path]) 26 | workflow = _create_workflow(avatar_image_path, audio_file_path) 27 | queue_history = await queue_task(workflow, settings) 28 | video_url = _get_video_url(settings, queue_history) 29 | output_path = _save_video_to_file(video_url) 30 | return output_path 31 | 32 | 33 | def upload_files_to_comfyui_server(files: list[Path]) -> None: 34 | """ 35 | Upload files to the server. 36 | """ 37 | settings = get_settings() 38 | comfy_server_url = settings.comfy_server_url 39 | logger.info(f"Uploading Files to ComfyUI Server at {comfy_server_url} ...") 40 | for file in files: 41 | with open(file, "rb") as f: 42 | to_upload_files = { 43 | 'image': (file.name, f, 'image/' + file.suffix[1:]) 44 | } 45 | response =requests.post(f"{comfy_server_url}/upload/image", files=to_upload_files) 46 | if response.status_code == 200: 47 | logger.success(f"Uploaded {file.name}") 48 | else: 49 | logger.error(f"Failed to upload {file.name}. Status code: {response.status_code}") 50 | 51 | 52 | def _create_workflow(avatar_image_path: Path, audio_file_path: Path) -> dict: 53 | """ 54 | Create a workflow for the video generation. 55 | """ 56 | with open(Path(WORKFLOW_FOLDER, "audio_image_to_video_with_sonic.json"), "r") as f: 57 | workflow = json.load(f) 58 | workflow["6"]["inputs"]["duration"] = _get_audio_file_duration(audio_file_path) 59 | workflow["7"]["inputs"]["image"] = avatar_image_path.name 60 | workflow["9"]["inputs"]["audio"] = audio_file_path.name 61 | return {"prompt": workflow} 62 | 63 | 64 | def _get_audio_file_duration(audio_file_path: Path) -> int: 65 | """ 66 | Get the duration of the audio file. 67 | """ 68 | info = sf.info(str(audio_file_path)) 69 | return round(info.duration, 2) + 2 70 | 71 | async def queue_task(workflow: dict, settings: Settings) -> QueueHistory: 72 | """ 73 | Queue a task to the server. 74 | """ 75 | response1 = _queue_prompt(workflow, settings) 76 | if response1 is None: 77 | logger.error("Failed to queue the prompt.") 78 | return 79 | 80 | prompt_id = response1['prompt_id'] 81 | logger.info(f'Prompt ID: {prompt_id}') 82 | logger.info('-' * 20) 83 | while True: 84 | time.sleep(5) 85 | queue_response = _get_queue(settings.comfy_server_url) 86 | if queue_response is None: 87 | continue 88 | 89 | queue_pending = queue_response.get('queue_pending', []) 90 | queue_running = queue_response.get('queue_running', []) 91 | 92 | # Check position in queue 93 | for position, item in enumerate(queue_pending): 94 | if item[1] == prompt_id: 95 | logger.info(f'Queue running: {len(queue_running)}, Queue pending: {len(queue_pending)}, Workflow is in position {position + 1} in the queue.') 96 | 97 | # Check if the prompt is currently running 98 | for item in queue_running: 99 | if item[1] == prompt_id: 100 | logger.info(f'Queue running: {len(queue_running)}, Queue pending: {len(queue_pending)}, Workflow is currently running.') 101 | break 102 | 103 | if not any(prompt_id in item for item in queue_pending + queue_running): 104 | break 105 | 106 | response = _get_history(settings.comfy_server_url, prompt_id) 107 | return QueueHistory(prompt_id=prompt_id, response=response) 108 | 109 | 110 | def _queue_prompt(prompt: str, settings: Settings): 111 | data = json.dumps(prompt).encode('utf-8') 112 | prompt_url = f"{settings.comfy_server_url}/prompt" 113 | try: 114 | r = requests.post(prompt_url, data=data, headers={"Content-Type": "application/json"}) 115 | r.raise_for_status() 116 | return r.json() 117 | except requests.exceptions.RequestException as ex: 118 | logger.error(f'POST {prompt_url} failed: {ex}') 119 | return None 120 | 121 | 122 | def _get_queue(url): 123 | queue_url = f"{url}/queue" 124 | try: 125 | r = requests.get(queue_url) 126 | r.raise_for_status() 127 | return r.json() 128 | except requests.exceptions.RequestException as ex: 129 | print(f'GET {queue_url} failed: {ex}') 130 | return None 131 | 132 | 133 | def _get_history(url, prompt_id): 134 | history_url = f"{url}/history/{prompt_id}" 135 | try: 136 | r = requests.get(history_url) 137 | r.raise_for_status() 138 | return r.json() 139 | except requests.exceptions.RequestException as ex: 140 | print(f'GET {history_url} failed: {ex}') 141 | return None 142 | 143 | 144 | def _get_video_url(settings: Settings, queue_history: QueueHistory) -> str: 145 | """ 146 | Get the video URL from the history response. 147 | """ 148 | output_info = queue_history.response.get(queue_history.prompt_id, {}).get('outputs', {}).get('8', {}).get('gifs', [{}])[0] 149 | filename = output_info.get('filename', 'unknown.png') 150 | output_url = f"{settings.comfy_server_url}/api/viewvideo?filename={filename}" 151 | logger.success(f"Output URL: {output_url}") 152 | return output_url 153 | 154 | def _save_video_to_file(video_url: str) -> Path: 155 | """ 156 | Save the video to a file. 157 | """ 158 | output_path = GENERATED_VIDEO_PATH / f"{uuid.uuid4().hex}.mp4" 159 | output_path.parent.mkdir(parents=True, exist_ok=True) 160 | response = requests.get(video_url) 161 | with open(output_path, "wb") as f: 162 | f.write(response.content) 163 | logger.success(f"Video saved to {output_path}") 164 | return output_path -------------------------------------------------------------------------------- /github_assets/hpi-logo-white.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /cloud_setup/runpod_templates/comfy_ui/2025-04-26_11-26-04_snapshot.json: -------------------------------------------------------------------------------- 1 | { 2 | "comfyui": "75c1c757d90ca891eff823893248ef8b51d31d01", 3 | "git_custom_nodes": { 4 | "https://github.com/ShmuelRonen/ComfyUI-LatentSyncWrapper": { 5 | "hash": "920c15ea8803f1b9cfdd504630338a21330c0eb5", 6 | "disabled": false 7 | }, 8 | "https://github.com/Comfy-Org/ComfyUI-Manager.git": { 9 | "hash": "50fc1389b0184d93765aefbbc4186fcdac96bdf9", 10 | "disabled": false 11 | }, 12 | "https://github.com/kijai/ComfyUI-WanVideoWrapper.git": { 13 | "hash": "f2bc29b931983e279d25452d284b3888c8c81346", 14 | "disabled": false 15 | }, 16 | "https://github.com/smthemex/ComfyUI_Sonic": { 17 | "hash": "6595df02e72406fe23baebc12ac2088832d7ff9b", 18 | "disabled": false 19 | }, 20 | "https://github.com/kijai/ComfyUI-KJNodes": { 21 | "hash": "b7e5b6f1e2b7c79b3f1e4b4bfe5e1687715803ab", 22 | "disabled": false 23 | } 24 | }, 25 | "cnr_custom_nodes": { 26 | "comfyui-depthanythingv2": "1.0.0", 27 | "comfyui-frame-interpolation": "1.0.6", 28 | "comfyui-impact-pack": "8.8.0", 29 | "comfyui-videohelpersuite": "1.3.1", 30 | "comfyui_essentials": "1.1.0", 31 | "rgthree-comfy": "1.0.0" 32 | }, 33 | "file_custom_nodes": [ 34 | { 35 | "filename": "websocket_image_save.py", 36 | "disabled": false 37 | } 38 | ], 39 | "pips": { 40 | "accelerate==1.6.0": "", 41 | "aiohappyeyeballs==2.6.1": "", 42 | "aiohttp==3.11.16": "", 43 | "aiosignal==1.3.1": "", 44 | "antlr4-python3-runtime==4.9.3": "", 45 | "anyio==4.2.0": "", 46 | "argon2-cffi==23.1.0": "", 47 | "argon2-cffi-bindings==21.2.0": "", 48 | "arrow==1.3.0": "", 49 | "asttokens==2.4.1": "", 50 | "async-lru==2.0.4": "", 51 | "async-timeout==4.0.3": "", 52 | "attrs==23.2.0": "", 53 | "audioread==3.0.1": "", 54 | "av==14.3.0": "", 55 | "Babel==2.14.0": "", 56 | "bash_kernel==0.9.3": "", 57 | "beautifulsoup4==4.12.3": "", 58 | "bleach==6.1.0": "", 59 | "build==1.2.1": "", 60 | "CacheControl==0.14.0": "", 61 | "certifi==2022.12.7": "", 62 | "cffi==1.17.0": "", 63 | "chardet==5.2.0": "", 64 | "charset-normalizer==2.1.1": "", 65 | "cleo==2.1.0": "", 66 | "click==8.1.8": "", 67 | "cm-jupyter-eg-kernel-wlm==4.0.1": "", 68 | "cmake==3.25.0": "", 69 | "color-matcher==0.6.0": "", 70 | "colorama==0.4.6": "", 71 | "comfyui_frontend_package==1.14.5": "", 72 | "comm==0.2.2": "", 73 | "contourpy==1.3.2": "", 74 | "crashtest==0.4.1": "", 75 | "cryptography==43.0.0": "", 76 | "csvw==3.5.1": "", 77 | "cycler==0.12.1": "", 78 | "ddt==1.7.2": "", 79 | "debugpy==1.8.5": "", 80 | "decorator==5.1.1": "", 81 | "defusedxml==0.7.1": "", 82 | "Deprecated==1.2.18": "", 83 | "diffusers==0.32.2": "", 84 | "distlib==0.3.8": "", 85 | "dlinfo==2.0.0": "", 86 | "docutils==0.21.2": "", 87 | "dulwich==0.21.7": "", 88 | "einops==0.7.0": "", 89 | "entrypoints==0.4": "", 90 | "exceptiongroup==1.2.2": "", 91 | "executing==2.0.1": "", 92 | "fastjsonschema==2.19.1": "", 93 | "filelock==3.15.4": "", 94 | "fonttools==4.57.0": "", 95 | "fqdn==1.5.1": "", 96 | "frozenlist==1.4.1": "", 97 | "fsspec==2024.2.0": "", 98 | "ftfy==6.3.1": "", 99 | "gdown==5.1.0": "", 100 | "gitdb==4.0.11": "", 101 | "GitPython==3.1.41": "", 102 | "h11==0.14.0": "", 103 | "httpcore==1.0.2": "", 104 | "httpx==0.26.0": "", 105 | "huggingface-hub==0.24.5": "", 106 | "idna==3.4": "", 107 | "imageio==2.37.0": "", 108 | "imageio-ffmpeg==0.6.0": "", 109 | "importlib_metadata==8.2.0": "", 110 | "installer==0.7.0": "", 111 | "ipykernel==6.29.5": "", 112 | "ipython==8.18.1": "", 113 | "ipython-genutils==0.2.0": "", 114 | "isodate==0.7.2": "", 115 | "isoduration==20.11.0": "", 116 | "jaraco.classes==3.4.0": "", 117 | "jedi==0.19.1": "", 118 | "jeepney==0.8.0": "", 119 | "Jinja2==3.1.2": "", 120 | "joblib==1.4.2": "", 121 | "json5==0.9.14": "", 122 | "jsonpointer==2.4": "", 123 | "jsonschema==4.21.1": "", 124 | "jsonschema-specifications==2023.12.1": "", 125 | "jupyter-events==0.12.0": "", 126 | "jupyter-lsp==2.2.5": "", 127 | "jupyter_client==8.6.2": "", 128 | "jupyter_core==5.7.2": "", 129 | "jupyter_server==2.15.0": "", 130 | "jupyter_server_terminals==0.5.3": "", 131 | "jupyterlab==4.1.0": "", 132 | "jupyterlab_pygments==0.3.0": "", 133 | "jupyterlab_server==2.25.2": "", 134 | "kanjize==1.6.0": "", 135 | "keyring==24.3.1": "", 136 | "kiwisolver==1.4.8": "", 137 | "kornia==0.8.0": "", 138 | "kornia_rs==0.1.8": "", 139 | "language-tags==1.2.0": "", 140 | "lazy_loader==0.4": "", 141 | "librosa==0.11.0": "", 142 | "lit==15.0.7": "", 143 | "llvmlite==0.44.0": "", 144 | "lxml==5.1.0": "", 145 | "markdown-it-py==3.0.0": "", 146 | "MarkupSafe==2.1.3": "", 147 | "matplotlib==3.10.1": "", 148 | "matplotlib-inline==0.1.7": "", 149 | "matrix-client==0.4.0": "", 150 | "mdurl==0.1.2": "", 151 | "mistune==3.0.2": "", 152 | "more-itertools==10.4.0": "", 153 | "mpmath==1.3.0": "", 154 | "msgpack==1.0.8": "", 155 | "mss==10.0.0": "", 156 | "multidict==6.0.5": "", 157 | "nbclassic==1.0.0": "", 158 | "nbclient==0.9.0": "", 159 | "nbconvert==7.15.0": "", 160 | "nbformat==5.9.2": "", 161 | "nest-asyncio==1.6.0": "", 162 | "networkx==3.2.1": "", 163 | "notebook_shim==0.2.4": "", 164 | "numba==0.61.2": "", 165 | "numpy==1.26.3": "", 166 | "nvidia-cublas-cu12==12.1.3.1": "", 167 | "nvidia-cuda-cupti-cu12==12.1.105": "", 168 | "nvidia-cuda-nvrtc-cu12==12.1.105": "", 169 | "nvidia-cuda-runtime-cu12==12.1.105": "", 170 | "nvidia-cudnn-cu12==9.1.0.70": "", 171 | "nvidia-cufft-cu12==11.0.2.54": "", 172 | "nvidia-curand-cu12==10.3.2.106": "", 173 | "nvidia-cusolver-cu12==11.4.5.107": "", 174 | "nvidia-cusparse-cu12==12.1.0.106": "", 175 | "nvidia-nccl-cu12==2.20.5": "", 176 | "nvidia-nvjitlink-cu12==12.8.93": "", 177 | "nvidia-nvtx-cu12==12.1.105": "", 178 | "omegaconf==2.3.0": "", 179 | "opencv-python==4.11.0.86": "", 180 | "overrides==7.7.0": "", 181 | "packaging==25.0": "", 182 | "pandocfilters==1.5.1": "", 183 | "parso==0.8.4": "", 184 | "pexpect==4.9.0": "", 185 | "phonemizer==3.3.0": "", 186 | "piexif==1.1.3": "", 187 | "pillow==11.2.1": "", 188 | "pkginfo==1.11.1": "", 189 | "platformdirs==4.2.2": "", 190 | "poetry-core==1.9.0": "", 191 | "pooch==1.8.2": "", 192 | "prometheus-client==0.19.0": "", 193 | "prompt-toolkit==3.0.43": "", 194 | "propcache==0.3.1": "", 195 | "protobuf==5.27.3": "", 196 | "psutil==5.9.8": "", 197 | "ptyprocess==0.7.0": "", 198 | "pure_eval==0.2.3": "", 199 | "py-espeak-ng==0.1.8": "", 200 | "pycparser==2.21": "", 201 | "pycryptodome==3.20.0": "", 202 | "PyGithub==2.6.1": "", 203 | "Pygments==2.17.2": "", 204 | "PyJWT==2.10.1": "", 205 | "PyNaCl==1.5.0": "", 206 | "pyparsing==3.2.3": "", 207 | "pyproject_hooks==1.2.0": "", 208 | "PySocks==1.7.1": "", 209 | "python-dateutil==2.8.2": "", 210 | "python-json-logger==3.3.0": "", 211 | "PyYAML==6.0.1": "", 212 | "pyzmq==26.2.0": "", 213 | "RapidFuzz==3.13.0": "", 214 | "rdflib==7.1.4": "", 215 | "referencing==0.36.2": "", 216 | "regex==2024.11.6": "", 217 | "requests==2.32.3": "", 218 | "rfc3339-validator==0.1.4": "", 219 | "rfc3986==1.5.0": "", 220 | "rfc3986-validator==0.1.1": "", 221 | "rich==14.0.0": "", 222 | "rpds-py==0.24.0": "", 223 | "safetensors==0.5.3": "", 224 | "sageattention==1.0.6": "", 225 | "scikit-image==0.25.2": "", 226 | "scikit-learn==1.6.1": "", 227 | "scipy==1.12.0": "", 228 | "SecretStorage==3.3.3": "", 229 | "segment-anything==1.0": "", 230 | "segments==2.3.0": "", 231 | "Send2Trash==1.8.3": "", 232 | "sentencepiece==0.2.0": "", 233 | "shellingham==1.5.4": "", 234 | "six==1.17.0": "", 235 | "smmap==5.0.2": "", 236 | "sniffio==1.3.1": "", 237 | "soundfile==0.13.1": "", 238 | "soupsieve==2.6": "", 239 | "soxr==0.5.0.post1": "", 240 | "spandrel==0.4.1": "", 241 | "stack-data==0.6.3": "", 242 | "SudachiDict-full==20250129": "", 243 | "SudachiPy==0.6.10": "", 244 | "sympy==1.13.3": "", 245 | "terminado==0.18.1": "", 246 | "threadpoolctl==3.6.0": "", 247 | "tifffile==2025.3.30": "", 248 | "tinycss2==1.4.0": "", 249 | "tokenizers==0.21.1": "", 250 | "toml==0.10.2": "", 251 | "tomli==2.2.1": "", 252 | "torch==2.4.0": "", 253 | "torchaudio==2.4.0": "", 254 | "torchsde==0.2.6": "", 255 | "torchvision==0.19.0": "", 256 | "tornado==6.4.1": "", 257 | "tqdm==4.67.1": "", 258 | "traitlets==5.14.3": "", 259 | "trampoline==0.1.2": "", 260 | "transformers==4.48.3": "", 261 | "triton==3.0.0": "", 262 | "typer==0.15.2": "", 263 | "types-python-dateutil==2.9.0.20241206": "", 264 | "typing_extensions==4.13.2": "", 265 | "uri-template==1.3.0": "", 266 | "uritemplate==4.1.1": "", 267 | "urllib3==1.26.20": "", 268 | "uv==0.6.14": "", 269 | "wcwidth==0.2.13": "", 270 | "webcolors==24.11.1": "", 271 | "webencodings==0.5.1": "", 272 | "websocket-client==1.8.0": "", 273 | "# Editable Git install with no remote (workspace==0.1.0)": "", 274 | "-e /home/jeremy.degail": "", 275 | "wrapt==1.17.2": "", 276 | "xformers==0.0.27.post2": "", 277 | "yarl==1.20.0": "", 278 | "zipp==3.21.0": "" 279 | } 280 | } --------------------------------------------------------------------------------