├── aafactory
    ├── src
    │   ├── __init__.py
    │   ├── aafactory
    │   │   ├── __init__.py
    │   │   ├── act
    │   │   │   └── interface.py
    │   │   ├── react
    │   │   │   └── interface.py
    │   │   ├── schemas.py
    │   │   ├── main.py
    │   │   ├── fetcher
    │   │   │   └── fetching.py
    │   │   ├── prompts.py
    │   │   ├── create_gradio_ui.py
    │   │   ├── configuration.py
    │   │   ├── settings.py
    │   │   ├── utils
    │   │   │   ├── voice.py
    │   │   │   └── interface.py
    │   │   ├── style.py
    │   │   ├── chat
    │   │   │   └── interface.py
    │   │   ├── database
    │   │   │   └── manage_db.py
    │   │   ├── avatar
    │   │   │   └── interface.py
    │   │   └── comfyui
    │   │   │   └── video.py
    │   └── tests
    │   │   ├── test_chat
    │   │       ├── test_chat_interface.py
    │   │       └── test_mock_chat.py
    │   │   └── test_configuration.py
    ├── assets
    │   └── demo
    │   │   └── avatar.jpg
    └── workflows
    │   ├── text_to_speech_with_zonos.json
    │   └── audio_image_to_video_with_sonic.json
├── .env.sample
├── cloud_setup
    ├── runpod_templates
    │   └── comfy_ui
    │   │   ├── server-requirements.txt
    │   │   ├── README.md
    │   │   ├── docker-compose.yml
    │   │   ├── restore_snapshot.sh
    │   │   ├── start.sh
    │   │   ├── Dockerfile
    │   │   └── 2025-04-26_11-26-04_snapshot.json
    ├── zonos
    │   └── pyproject.toml
    ├── sonic
    │   ├── install_sonic.sh
    │   └── pyproject.toml
    ├── joyvasa
    │   └── setup_joyvasa.sh
    └── pyproject.toml
├── github_assets
    ├── napoleon_example.png
    └── hpi-logo-white.svg
├── .vscode
    └── launch.json
├── pyproject.toml
├── LICENSE.md
├── .gitignore
└── README.md


/aafactory/src/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/aafactory/src/aafactory/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.env.sample:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=
2 | ELEVENLABS_API_KEY=
3 | VOICE_ID=
4 | COMFYUI_SERVER_IP=
5 | COMFYUI_SERVER_PORT=


--------------------------------------------------------------------------------
/cloud_setup/runpod_templates/comfy_ui/server-requirements.txt:
--------------------------------------------------------------------------------
1 | piexif==1.1.3
2 | segment_anything
3 | huggingface_hub


--------------------------------------------------------------------------------
/aafactory/assets/demo/avatar.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AA-Factory/aafactory-prototype/HEAD/aafactory/assets/demo/avatar.jpg


--------------------------------------------------------------------------------
/github_assets/napoleon_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AA-Factory/aafactory-prototype/HEAD/github_assets/napoleon_example.png


--------------------------------------------------------------------------------
/aafactory/src/aafactory/act/interface.py:
--------------------------------------------------------------------------------
1 | import gradio as gr
2 | 
3 | 
4 | def create_act_interface():
5 |     with gr.Blocks() as act:
6 |         gr.Markdown("Coming Soon ...")


--------------------------------------------------------------------------------
/aafactory/src/aafactory/react/interface.py:
--------------------------------------------------------------------------------
1 | import gradio as gr
2 | 
3 | def create_react_interface():
4 |     with gr.Blocks() as react:
5 |         gr.Markdown("Coming Soon ...")


--------------------------------------------------------------------------------
/aafactory/src/aafactory/schemas.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel
2 | 
3 | 
4 | class Settings(BaseModel):
5 |     comfy_server_url: str
6 |     comfy_server_port: str
7 |     openai_api_key: str
8 |     elevenlabs_api_key: str
9 | 


--------------------------------------------------------------------------------
/cloud_setup/runpod_templates/comfy_ui/README.md:
--------------------------------------------------------------------------------
 1 | # How to run the docker
 2 | 
 3 | ```bash 
 4 | docker compose up
 5 | ```
 6 | 
 7 | # How to push the docker image to docker hub
 8 | 
 9 | ```bash
10 | docker push repo_name/aafactory-server:version
11 | ```


--------------------------------------------------------------------------------
/cloud_setup/zonos/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "workspace"
 3 | version = "0.1.0"
 4 | description = ""
 5 | authors = ["skip"]
 6 | readme = "README.md"
 7 | 
 8 | [tool.poetry.dependencies]
 9 | python = "^3.10"
10 | 
11 | [build-system]
12 | requires = ["poetry-core"]
13 | build-backend = "poetry.core.masonry.api"
14 | 


--------------------------------------------------------------------------------
/aafactory/src/tests/test_chat/test_chat_interface.py:
--------------------------------------------------------------------------------
1 | # Basic Initialization Test
2 | import pytest
3 | from aafactory.chat.interface import create_chat_interface#
4 | 
5 | def test_chat_interface_initialization():
6 |     #Test that ChatInterface can be initialized."""
7 |     chat = create_chat_interface()
8 |     assert chat is not None


--------------------------------------------------------------------------------
/aafactory/src/tests/test_configuration.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | # Create test paths that are separate from production
 4 | TEST_DIR = Path(__file__).parent / "test_data"
 5 | TEST_DB_PATH = TEST_DIR / "test_avatar_db.json"
 6 | TEST_AVATAR_IMAGE_PATH = TEST_DIR / "test_avatar.png"
 7 | 
 8 | # Create test directories if they don't exist
 9 | TEST_DIR.mkdir(parents=True, exist_ok=True)
10 | 


--------------------------------------------------------------------------------
/cloud_setup/runpod_templates/comfy_ui/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   aafactory-server:
 3 |     build:
 4 |       context: .
 5 |       platforms:
 6 |         - linux/amd64
 7 |     image: jeney/aafactory-server:2.1
 8 |     environment:
 9 |       - SERVE_API_LOCALLY=true
10 |     ports:
11 |       - "8000:8000"
12 |       - "8188:8188"
13 |     volumes:
14 |       - ./data/comfyui/output:/comfyui/output
15 |       - ./data/runpod-volume:/runpod-volume
16 | 


--------------------------------------------------------------------------------
/aafactory/src/aafactory/main.py:
--------------------------------------------------------------------------------
 1 | from aafactory.src.avatar.acting import run_avatar
 2 | from aafactory.src.fetcher.fetching import run_fetcher
 3 | from aafactory.src.narrator.narration import run_narration
 4 | 
 5 | def main():
 6 |     current_environment = run_fetcher(simulation=True)
 7 |     current_situation = run_narration(current_environment)
 8 |     avatar_taken_actions = run_avatar(current_situation)
 9 |     
10 | 
11 | if __name__ == "__main__":
12 |     main()


--------------------------------------------------------------------------------
/cloud_setup/runpod_templates/comfy_ui/restore_snapshot.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e
 4 | 
 5 | SNAPSHOT_FILE=$(ls /*snapshot*.json 2>/dev/null | head -n 1)
 6 | 
 7 | if [ -z "$SNAPSHOT_FILE" ]; then
 8 |     echo "runpod-worker-comfy: No snapshot file found. Exiting..."
 9 |     exit 0
10 | fi
11 | 
12 | echo "runpod-worker-comfy: restoring snapshot: $SNAPSHOT_FILE"
13 | 
14 | comfy --workspace /comfyui node restore-snapshot "$SNAPSHOT_FILE" --pip-non-url
15 | 
16 | echo "runpod-worker-comfy: restored snapshot file: $SNAPSHOT_FILE"


--------------------------------------------------------------------------------
/cloud_setup/sonic/install_sonic.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | . /venv/bin/activate
 3 | pip install poetry
 4 | poetry install
 5 | 
 6 | huggingface-cli download LeonJoe13/Sonic --local-dir  ./ComfyUI/models/sonic
 7 | huggingface-cli download stabilityai/stable-video-diffusion-img2vid-xt --local-dir  ./ComfyUI/models/sonic/stable-video-diffusion-img2vid-xt
 8 | huggingface-cli download openai/whisper-tiny --local-dir ./ComfyUI/models/sonic/whisper-tiny
 9 | mv ComfyUI/models/sonic/stable-video-diffusion-img2vid-xt/svd_xt.safetensors ComfyUI/models/checkpoints/
10 | mv ComfyUI/models/sonic/Sonic/* ComfyUI/models/sonic/


--------------------------------------------------------------------------------
/cloud_setup/runpod_templates/comfy_ui/start.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Use libtcmalloc for better memory management
 4 | TCMALLOC="$(ldconfig -p | grep -Po "libtcmalloc.so.\d" | head -n 1)"
 5 | export LD_PRELOAD="${TCMALLOC}"
 6 | 
 7 | # Download models
 8 | huggingface-cli download LeonJoe13/Sonic --local-dir  ./models/sonic
 9 | huggingface-cli download stabilityai/stable-video-diffusion-img2vid-xt --local-dir  ./models/sonic/stable-video-diffusion-img2vid-xt
10 | huggingface-cli download openai/whisper-tiny --local-dir ./models/sonic/whisper-tiny
11 | mv models/sonic/stable-video-diffusion-img2vid-xt/svd_xt.safetensors models/checkpoints/
12 | mv models/sonic/Sonic/* models/sonic/
13 | 
14 | # Start ComfyUI
15 | python3 main.py --listen 0.0.0.0
16 | 


--------------------------------------------------------------------------------
/aafactory/src/aafactory/fetcher/fetching.py:
--------------------------------------------------------------------------------
 1 | from openai import BaseModel
 2 | from openai import OpenAI
 3 | import re
 4 | from aafactory.database.manage_db import get_settings
 5 | 
 6 | 
 7 | async def send_request_to_open_ai(messages: list[dict[str, str]]) -> BaseModel:
 8 |     """
 9 |     Send a request to the OpenAI API.
10 |     """
11 |     settings = get_settings()
12 |     client = OpenAI(api_key=settings.openai_api_key)
13 |     response = client.chat.completions.create(
14 |         model="gpt-4o", # gpt-4o-mini
15 |         messages=messages,
16 |         temperature=1.0,
17 |         top_p=1.0,
18 |     )
19 |     raw_content = response.choices[0].message.content
20 |     content_without_double_asterisks = re.sub(r'\*\*', '', raw_content)
21 |     return content_without_double_asterisks
22 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // Use IntelliSense to learn about possible attributes.
 3 |     // Hover to view descriptions of existing attributes.
 4 |     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
 5 |     "version": "0.2.0",
 6 |     "configurations": [
 7 |       {
 8 |         "name": "Python Debugger: Current File",
 9 |         "type": "debugpy",
10 |         "request": "launch",
11 |         "program": "${file}",
12 |         "console": "integratedTerminal"
13 |       },
14 |       {
15 |         "name": "Run Gradio UI",
16 |         "type": "debugpy",
17 |         "request": "launch",
18 |         "program": "${workspaceFolder}/aafactory/src/aafactory/create_gradio_ui.py",
19 |         "console": "integratedTerminal",
20 |         "justMyCode": false
21 |       }
22 |     ]
23 |   }
24 |   


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "aafactory"
 3 | version = "0.1.0"
 4 | description = ""
 5 | authors = ["Jeremy de Gail <jeremy.degail@hoganlovells.com>"]
 6 | readme = "README.md"
 7 | packages = [{include = "aafactory", from = "./aafactory/src"}]
 8 | 
 9 | [tool.poetry.dependencies]
10 | python = "^3.13"
11 | pydantic = "^2.10.4"
12 | gradio = "^5.9.1"
13 | openai = "^1.58.1"
14 | python-dotenv = "^1.0.1"
15 | llama-cpp-python = "^0.3.5"
16 | loguru = "^0.7.3"
17 | pandas = "^2.2.3"
18 | docstring-parser = "^0.16"
19 | tinydb = "^4.8.2"
20 | line-profiler = "^4.2.0"
21 | matplotlib = "^3.10.0"
22 | soundfile = "^0.13.1"
23 | 
24 | 
25 | [tool.poetry.group.dev.dependencies]
26 | pytest = "^8.3.5"
27 | pytest-cov = "^6.1.1"
28 | 
29 | [build-system]
30 | requires = ["poetry-core"]
31 | build-backend = "poetry.core.masonry.api"
32 | 


--------------------------------------------------------------------------------
/aafactory/src/aafactory/prompts.py:
--------------------------------------------------------------------------------
 1 | AVATAR_DESCRIPTION = """
 2 | Aristotle was transmigrated to the world of the internet. He has a chip in his head that allows him to understand the internet and to be connected to it.
 3 | """
 4 | 
 5 | NEWS_PROMPT = """
 6 | Create fake news. Try to create realistic news.
 7 | """
 8 | 
 9 | SOCIAL_MEDIA_PROMPT = """
10 | Create fake posts from a social media. Try to create realistic posts.
11 | 
12 | Examples:
13 |     {
14 |       "text": "Just completed a challenging task and feeling great about it!",
15 |       "timestamp": "2023-10-01T12:00:00Z",
16 |       "user": {
17 |         "username": "HappyAvatar",
18 |         "description": "Just a virtual being navigating through life.",
19 |         "location": "Virtual World",
20 |         "followers_count": 150,
21 |         "following_count": 50,
22 |         "tweet_count": 300
23 |       }
24 | """


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Copyright 2025 Jeremy de Gail
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/aafactory/src/tests/test_chat/test_mock_chat.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import aafactory.chat.interface as chat_interface
 3 | 
 4 | @pytest.mark.asyncio
 5 | async def test_chat_history_clears_on_avatar_change(mocker):
 6 |     chat_interface.CHAT_HISTORY.clear()
 7 |     chat_interface.CURRENT_AVATAR = None  # Reset avatar tracker
 8 | 
 9 |     mocker.patch("aafactory.chat.interface.send_request_to_open_ai", return_value="Hello, user!")
10 |     mocker.patch("aafactory.chat.interface.send_request_to_elevenlabs", return_value="mock_audio_path.mp3")
11 |     mocker.patch("aafactory.chat.interface.send_request_to_generate_video", return_value="mock_video_path.mp4")
12 | 
13 |     # First message with Avatar A
14 |     await chat_interface.send_request_to_llm(
15 |         "avatarA.png", "Hi Avatar A!", "AvatarA", "Friendly", "Knows stuff", 
16 |         "elevenlabs", "voiceidA", "path/to/recA", "transcriptA", "en"
17 |     )
18 |     
19 |     assert chat_interface.CHAT_HISTORY == [
20 |         ["Hi Avatar A!", "Hello, user!"],
21 |     ]
22 |     
23 |     # Second message with Avatar B
24 |     await chat_interface.send_request_to_llm(
25 |         "avatarB.png", "Hello Avatar B!", "AvatarB", "Serious", "Knows more stuff", 
26 |         "elevenlabs", "voiceidB", "path/to/recB", "transcriptB", "en"
27 |     )
28 |     assert chat_interface.CHAT_HISTORY == [
29 |         ["Hello Avatar B!", "Hello, user!"]
30 |     ]
31 | 


--------------------------------------------------------------------------------
/cloud_setup/joyvasa/setup_joyvasa.sh:
--------------------------------------------------------------------------------
 1 | # reference: https://github.com/jdh-algo/JoyVASA
 2 | 
 3 | # Install CUDA
 4 | wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-ubuntu2204.pin
 5 | mv cuda-ubuntu2204.pin /etc/apt/preferences.d/cuda-repository-pin-600
 6 | wget https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda-repo-ubuntu2204-12-6-local_12.6.3-560.35.05-1_amd64.deb
 7 | dpkg -i cuda-repo-ubuntu2204-12-6-local_12.6.3-560.35.05-1_amd64.deb
 8 | cp /var/cuda-repo-ubuntu2204-12-6-local/cuda-*-keyring.gpg /usr/share/keyrings/
 9 | apt-get update
10 | apt-get -y install cuda-toolkit-12-6
11 | 
12 | # Install git-lfs
13 | curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash
14 | apt-get install git-lfs
15 | 
16 | # Install JoyVASA
17 | git clone https://huggingface.co/jdh-algo/JoyVASA
18 | cd JoyVASA
19 | pip install -r requirements.txt
20 | apt-get update  
21 | apt-get install ffmpeg -y
22 | 
23 | cd src/utils/dependencies/XPose/models/UniPose/ops
24 | python setup.py build install
25 | cd -
26 | 
27 | # Install Chinese-Hubert
28 | git clone https://huggingface.co/TencentGameMate/chinese-hubert-base
29 | cd chinese-hubert-base
30 | cd -
31 | 
32 | # Install Wav2Vec2
33 | git lfs install
34 | git clone https://huggingface.co/facebook/wav2vec2-base-960h
35 | cd -
36 | 
37 | # Install LivePortrait
38 | huggingface-cli download KwaiVGI/LivePortrait --local-dir pretrained_weights --exclude "*.git*" "README.md" "docs"


--------------------------------------------------------------------------------
/aafactory/src/aafactory/create_gradio_ui.py:
--------------------------------------------------------------------------------
 1 | from aafactory.configuration import DB_PATH, AVATAR_VOICE_RECORDINGS_PATH
 2 | from aafactory.act.interface import create_act_interface
 3 | from aafactory.avatar.interface import create_avatar_setup_interface
 4 | from aafactory.chat.interface import create_chat_interface
 5 | from aafactory.react.interface import create_react_interface
 6 | from aafactory.settings import create_settings
 7 | from aafactory.style import CSS
 8 | from aafactory.utils.interface import create_utils_interface
 9 | import gradio as gr
10 | import asyncio
11 | 
12 | 
13 | async def create_gradio_interface():
14 |     DB_PATH.parent.mkdir(parents=True, exist_ok=True)
15 |     if not DB_PATH.exists():
16 |         DB_PATH.touch()
17 |     AVATAR_VOICE_RECORDINGS_PATH.mkdir(parents=True, exist_ok=True)
18 |     with gr.Blocks() as simulation:
19 |         with gr.Tabs():
20 |             with gr.Tab(label="Avatar"):  
21 |                 create_avatar_setup_interface()
22 |             with gr.Tab(label="Chat"):  
23 |                 create_chat_interface()
24 |             with gr.Tab(label="React"):
25 |                 create_react_interface()
26 |             with gr.Tab(label="Act"):
27 |                 create_act_interface()
28 |             with gr.Tab(label="Utils"):
29 |                 create_utils_interface()
30 |             with gr.Tab(label="Settings"):
31 |                 create_settings()
32 |     return simulation
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     app = asyncio.run(create_gradio_interface())
37 |     app.launch(share=False)
38 |     


--------------------------------------------------------------------------------
/aafactory/workflows/text_to_speech_with_zonos.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "12": {
 3 |     "inputs": {
 4 |       "audio": "voice.wav"
 5 |     },
 6 |     "class_type": "LoadAudio",
 7 |     "_meta": {
 8 |       "title": "LoadAudio"
 9 |     }
10 |   },
11 |   "13": {
12 |     "inputs": {
13 |       "audio": [
14 |         "24",
15 |         0
16 |       ]
17 |     },
18 |     "class_type": "PreviewAudio",
19 |     "_meta": {
20 |       "title": "PreviewAudio"
21 |     }
22 |   },
23 |   "22": {
24 |     "inputs": {
25 |       "happy": 1,
26 |       "sad": 0,
27 |       "disgust": 0,
28 |       "fear": 0,
29 |       "surprise": 0,
30 |       "anger": 0.02,
31 |       "other": 0.1,
32 |       "neutral": 0.2
33 |     },
34 |     "class_type": "ZonosEmotion",
35 |     "_meta": {
36 |       "title": "Zonos Emotion"
37 |     }
38 |   },
39 |   "24": {
40 |     "inputs": {
41 |       "speech": "Hey there! This is pretty cool right?",
42 |       "seed": 580,
43 |       "model_type": "Zyphra/Zonos-v0.1-transformer",
44 |       "language": "en-us",
45 |       "cfg_scale": 2,
46 |       "min_p": 0.15,
47 |       "speed": 1,
48 |       "disable_compiler": true,
49 |       "sample_text": "No, all that stuff, like you could have all the money in the world, all the followers, everything. It's just doesn't really make you happy.",
50 |       "speaker_noised": true,
51 |       "sample_audio": [
52 |         "12",
53 |         0
54 |       ],
55 |       "emotion": [
56 |         "22",
57 |         0
58 |       ]
59 |     },
60 |     "class_type": "ZonosGenerate",
61 |     "_meta": {
62 |       "title": "Zonos Generate"
63 |     }
64 |   }
65 | }


--------------------------------------------------------------------------------
/cloud_setup/runpod_templates/comfy_ui/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Stage 1: Base image with common dependencies
 2 | FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04
 3 | 
 4 | # Prevents prompts from packages asking for user input during installation
 5 | ENV DEBIAN_FRONTEND=noninteractive
 6 | # Prefer binary wheels over source distributions for faster pip installations
 7 | ENV PIP_PREFER_BINARY=1
 8 | # Ensures output from python is printed immediately to the terminal without buffering
 9 | ENV PYTHONUNBUFFERED=1 
10 | # Speed up some cmake builds
11 | ENV CMAKE_BUILD_PARALLEL_LEVEL=8
12 | 
13 | # Install Python, git and other necessary tools
14 | RUN apt-get update && apt-get install -y \
15 |     python3.10 \
16 |     python3-pip \
17 |     libglib2.0-0 \
18 |     git \
19 |     wget \
20 |     libgl1 \
21 |     && ln -sf /usr/bin/python3.10 /usr/bin/python \
22 |     && ln -sf /usr/bin/pip3 /usr/bin/pip
23 | 
24 | # Clean up to reduce image size
25 | RUN apt-get autoremove -y && apt-get clean -y && rm -rf /var/lib/apt/lists/*
26 | 
27 | # Install comfy-cli
28 | RUN pip install comfy-cli
29 | 
30 | # Install ComfyUI
31 | RUN /usr/bin/yes | comfy --workspace /comfyui install --cuda-version 11.8 --nvidia --version 0.3.30
32 | 
33 | # Install runpod
34 | RUN pip install runpod requests uv
35 | 
36 | # Go back to the root
37 | WORKDIR /
38 | 
39 | # Add scripts
40 | ADD pyproject.toml restore_snapshot.sh ./
41 | RUN chmod +x /restore_snapshot.sh
42 | # RUN uv sync --no-cache
43 | 
44 | # Optionally copy the snapshot file
45 | ADD *snapshot*.json /
46 | 
47 | # Install ComfyUI
48 | RUN ./restore_snapshot.sh
49 | 
50 | # Change working directory to ComfyUI
51 | WORKDIR /comfyui
52 | 
53 | # Configure civitdl with API key
54 | COPY server-requirements.txt /comfyui/server-requirements.txt
55 | COPY start.sh /comfyui/start.sh
56 | RUN pip install -r server-requirements.txt
57 | 
58 | RUN chmod +x /comfyui/start.sh
59 | CMD ["/comfyui/start.sh"]


--------------------------------------------------------------------------------
/aafactory/src/aafactory/configuration.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import warnings
 3 | 
 4 | warnings.filterwarnings('ignore', message='Video does not have browser-compatible container or codec.*')
 5 | warnings.filterwarnings('ignore', message='You have not specified a value for the `type` parameter.*')
 6 | 
 7 | ROOT_DIR = Path(__file__).parent.parent.parent
 8 | DB_PATH = ROOT_DIR / "databases" / "avatar_db.json"
 9 | GENERATED_VOICE_PATH = ROOT_DIR / "assets/generated_voices/"
10 | GENERATED_VIDEO_PATH = ROOT_DIR / "assets/generated_videos/"
11 | WORKFLOW_FOLDER = ROOT_DIR / "workflows"
12 | DEFAULT_AVATAR_IMAGE_PATH = ROOT_DIR / "assets/demo/avatar.jpg"
13 | DEFAULT_VOICE_RECORDING_PATH = ROOT_DIR / "assets/demo/voice_recording.mp3"
14 | AVATAR_IMAGES_PATH = ROOT_DIR / "assets/avatar_images"
15 | 
16 | SETTINGS_TABLE_NAME = "settings"
17 | AVATAR_TABLE_NAME = "avatar"
18 | AVATAR_PAGE_SETTINGS_TABLE_NAME = "avatar_page_settings"
19 | AVATAR_VOICE_RECORDINGS_PATH = ROOT_DIR / "assets/avatar_voice_recordings"
20 | 
21 | VOICE_MODELS = ["", "elevenlabs", "zonos"]
22 | VOICE_LANGUAGES = [
23 |     'af', 'am', 'an', 'ar', 'as', 'az', 'ba', 'bg', 'bn', 'bpy', 'bs', 'ca', 'cmn',
24 |     'cs', 'cy', 'da', 'de', 'el', 'en-029', 'en-gb', 'en-gb-scotland', 'en-gb-x-gbclan',
25 |     'en-gb-x-gbcwmd', 'en-gb-x-rp', 'en-us', 'eo', 'es', 'es-419', 'et', 'eu', 'fa',
26 |     'fa-latn', 'fi', 'fr-be', 'fr-ch', 'fr-fr', 'ga', 'gd', 'gn', 'grc', 'gu', 'hak',
27 |     'hi', 'hr', 'ht', 'hu', 'hy', 'hyw', 'ia', 'id', 'is', 'it', 'ja', 'jbo', 'ka',
28 |     'kk', 'kl', 'kn', 'ko', 'kok', 'ku', 'ky', 'la', 'lfn', 'lt', 'lv', 'mi', 'mk',
29 |     'ml', 'mr', 'ms', 'mt', 'my', 'nb', 'nci', 'ne', 'nl', 'om', 'or', 'pa', 'pap',
30 |     'pl', 'pt', 'pt-br', 'py', 'quc', 'ro', 'ru', 'ru-lv', 'sd', 'shn', 'si', 'sk',
31 |     'sl', 'sq', 'sr', 'sv', 'sw', 'ta', 'te', 'tn', 'tr', 'tt', 'ur', 'uz', 'vi',
32 |     'vi-vn-x-central', 'vi-vn-x-south', 'yue'
33 | ]
34 | 
35 | TEXT_TO_SPEECH_WITH_ZONOS_WORKFLOW_PATH = ROOT_DIR / "workflows" / "text_to_speech_with_zonos.json"


--------------------------------------------------------------------------------
/aafactory/src/aafactory/settings.py:
--------------------------------------------------------------------------------
 1 | from aafactory.configuration import DB_PATH
 2 | from aafactory.schemas import Settings
 3 | from dotenv import load_dotenv
 4 | import gradio as gr
 5 | import os
 6 | from loguru import logger
 7 | from tinydb import TinyDB
 8 | 
 9 | load_dotenv()
10 | 
11 | def create_settings():
12 |     with gr.Blocks() as settings:
13 |         gr.Markdown("## Settings")
14 |         with gr.Accordion("Comfy UI", open=False):
15 |             comfy_server_url = gr.Textbox(label="ComfyUI Server URL", value=os.getenv("COMFYUI_SERVER_URL"), interactive=True)
16 |             comfy_server_port = gr.Textbox(label="ComfyUI Server Port", value=os.getenv("COMFYUI_SERVER_PORT"), interactive=True)
17 |         with gr.Accordion("ElevenLabs", open=False):
18 |             elevenlabs_api_key = gr.Textbox(label="ElevenLabs API Key", value=os.getenv("ELEVENLABS_API_KEY"), interactive=True)
19 |         with gr.Accordion("LLM", open=False):
20 |             openai_api_key = gr.Textbox(label="OpenAI API Key", value=os.getenv("OPENAI_API_KEY"), interactive=True)
21 | 
22 |         submit_btn = gr.Button("Save Settings")
23 |         submit_btn.click(
24 |             fn=_save_settings_to_db, 
25 |             inputs=[comfy_server_url, comfy_server_port, openai_api_key, elevenlabs_api_key],
26 |             outputs=[comfy_server_url, comfy_server_port, openai_api_key, elevenlabs_api_key]
27 |         )
28 |     settings.load(
29 |         fn=_load_settings_from_db,
30 |         outputs=[comfy_server_url, comfy_server_port, openai_api_key, elevenlabs_api_key]
31 |     )
32 | 
33 | def _save_settings_to_db(*args):
34 |     settings = Settings(
35 |         comfy_server_url=args[0],
36 |         comfy_server_port=args[1],
37 |         openai_api_key=args[2],
38 |         elevenlabs_api_key=args[3]
39 |     )
40 |     db = TinyDB(DB_PATH)
41 |     db.table("settings").truncate()
42 |     db.table("settings").insert(settings.model_dump())
43 |     logger.success("Settings saved")
44 |     settings_dict = settings.model_dump()
45 |     return [settings_dict[key] for key in [
46 |         'comfy_server_url', 'comfy_server_port', 'openai_api_key',
47 |         'elevenlabs_api_key'
48 |     ]]
49 | 
50 | def _load_settings_from_db():
51 |     db = TinyDB(DB_PATH)
52 |     settings_dict = db.table("settings").get(doc_id=1)
53 |     if settings_dict is None:
54 |         return [None, None, None, None]
55 |     return [settings_dict[key] for key in [
56 |         'comfy_server_url', 'comfy_server_port', 'openai_api_key',
57 |         'elevenlabs_api_key'
58 |     ]]


--------------------------------------------------------------------------------
/aafactory/workflows/audio_image_to_video_with_sonic.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "1": {
  3 |     "inputs": {
  4 |       "sonic_unet": "unet.pth",
  5 |       "ip_audio_scale": 1,
  6 |       "use_interframe": true,
  7 |       "dtype": "bf16",
  8 |       "model": [
  9 |         "5",
 10 |         0
 11 |       ]
 12 |     },
 13 |     "class_type": "SONICTLoader",
 14 |     "_meta": {
 15 |       "title": "SONICTLoader"
 16 |     }
 17 |   },
 18 |   "2": {
 19 |     "inputs": {
 20 |       "seed": 1454942941,
 21 |       "inference_steps": 25,
 22 |       "dynamic_scale": 1,
 23 |       "fps": 25,
 24 |       "model": [
 25 |         "1",
 26 |         0
 27 |       ],
 28 |       "data_dict": [
 29 |         "6",
 30 |         0
 31 |       ]
 32 |     },
 33 |     "class_type": "SONICSampler",
 34 |     "_meta": {
 35 |       "title": "SONICSampler"
 36 |     }
 37 |   },
 38 |   "5": {
 39 |     "inputs": {
 40 |       "ckpt_name": "svd_xt.safetensors"
 41 |     },
 42 |     "class_type": "ImageOnlyCheckpointLoader",
 43 |     "_meta": {
 44 |       "title": "Image Only Checkpoint Loader (img2vid model)"
 45 |     }
 46 |   },
 47 |   "6": {
 48 |     "inputs": {
 49 |       "min_resolution": 320,
 50 |       "duration": 3.8000000000000003,
 51 |       "expand_ratio": 0.5,
 52 |       "clip_vision": [
 53 |         "5",
 54 |         1
 55 |       ],
 56 |       "vae": [
 57 |         "5",
 58 |         2
 59 |       ],
 60 |       "audio": [
 61 |         "9",
 62 |         0
 63 |       ],
 64 |       "image": [
 65 |         "7",
 66 |         0
 67 |       ],
 68 |       "weight_dtype": [
 69 |         "1",
 70 |         1
 71 |       ]
 72 |     },
 73 |     "class_type": "SONIC_PreData",
 74 |     "_meta": {
 75 |       "title": "SONIC_PreData"
 76 |     }
 77 |   },
 78 |   "7": {
 79 |     "inputs": {
 80 |       "image": "avatar.jpg",
 81 |       "upload": "image"
 82 |     },
 83 |     "class_type": "LoadImage",
 84 |     "_meta": {
 85 |       "title": "Load Image"
 86 |     }
 87 |   },
 88 |   "8": {
 89 |     "inputs": {
 90 |       "frame_rate": [
 91 |         "2",
 92 |         1
 93 |       ],
 94 |       "loop_count": 0,
 95 |       "filename_prefix": "AnimateDiff",
 96 |       "format": "video/h265-mp4",
 97 |       "pix_fmt": "yuv420p",
 98 |       "crf": 22,
 99 |       "save_metadata": false,
100 |       "pingpong": false,
101 |       "save_output": true,
102 |       "images": [
103 |         "2",
104 |         0
105 |       ],
106 |       "audio": [
107 |         "9",
108 |         0
109 |       ]
110 |     },
111 |     "class_type": "VHS_VideoCombine",
112 |     "_meta": {
113 |       "title": "Video Combine 🎥🅥🅗🅢"
114 |     }
115 |   },
116 |   "9": {
117 |     "inputs": {
118 |       "audio": "df8f1d5e094e4614a69044a4f191b3f6.mp3"
119 |     },
120 |     "class_type": "LoadAudio",
121 |     "_meta": {
122 |       "title": "LoadAudio"
123 |     }
124 |   }
125 | }


--------------------------------------------------------------------------------
/cloud_setup/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [project]
  2 | name = "workspace"
  3 | version = "0.1.0"
  4 | description = ""
  5 | authors = [{ name = "Reekomer" }]
  6 | readme = "README.md"
  7 | requires-python = ">=3.10"
  8 | dependencies = [
  9 |     "torch==2.4.0",
 10 |     "torchaudio==2.4.0",
 11 |     "torchvision==0.19.0",
 12 |     "accelerate==0.33.0",
 13 |     "aiohttp>=3.10.7",
 14 |     "aiosignal==1.3.1",
 15 |     "anyio==4.2.0",
 16 |     "argon2-cffi==23.1.0",
 17 |     "argon2-cffi-bindings==21.2.0",
 18 |     "arrow==1.3.0",
 19 |     "asttokens==2.4.1",
 20 |     "async-lru==2.0.4",
 21 |     "async-timeout==4.0.3",
 22 |     "attrs==23.2.0",
 23 |     "babel==2.14.0",
 24 |     "beautifulsoup4==4.12.3",
 25 |     "bleach==6.1.0",
 26 |     "build==1.2.1",
 27 |     "cachecontrol==0.14.0",
 28 |     "certifi==2022.12.7",
 29 |     "cffi==1.16.0",
 30 |     "charset-normalizer==2.1.1",
 31 |     "cleo==2.1.0",
 32 |     "cmake==3.25.0",
 33 |     "comm==0.2.1",
 34 |     "crashtest==0.4.1",
 35 |     "cryptography==43.0.0",
 36 |     "debugpy==1.8.0",
 37 |     "decorator==5.1.1",
 38 |     "defusedxml==0.7.1",
 39 |     "distlib==0.3.8",
 40 |     "dulwich==0.21.7",
 41 |     "einops==0.7.0",
 42 |     "entrypoints==0.4",
 43 |     "exceptiongroup==1.2.0",
 44 |     "executing==2.0.1",
 45 |     "fastjsonschema==2.19.1",
 46 |     "filelock==3.15.4",
 47 |     "fqdn==1.5.1",
 48 |     "frozenlist==1.4.1",
 49 |     "fsspec==2024.2.0",
 50 |     "gdown==5.1.0",
 51 |     "gitdb==4.0.11",
 52 |     "gitpython==3.1.41",
 53 |     "h11==0.14.0",
 54 |     "httpcore==1.0.2",
 55 |     "httpx==0.26.0",
 56 |     "huggingface-hub==0.24.5",
 57 |     "idna==3.4",
 58 |     "importlib-metadata==8.2.0",
 59 |     "installer==0.7.0",
 60 |     "isoduration==20.11.0",
 61 |     "jaraco-classes==3.4.0",
 62 |     "jedi==0.19.1",
 63 |     "jeepney==0.8.0",
 64 |     "jinja2==3.1.2",
 65 |     "json5==0.9.14",
 66 |     "jsonpointer==2.4",
 67 |     "jsonschema==4.21.1",
 68 |     "jsonschema-specifications==2023.12.1",
 69 |     "keyring==24.3.1",
 70 |     "lit==15.0.7",
 71 |     "lxml==5.1.0",
 72 |     "markupsafe==2.1.3",
 73 |     "matplotlib-inline==0.1.6",
 74 |     "matrix-client==0.4.0",
 75 |     "mistune==3.0.2",
 76 |     "more-itertools==10.4.0",
 77 |     "mpmath==1.3.0",
 78 |     "msgpack==1.0.8",
 79 |     "multidict==6.0.5",
 80 |     "nbclassic==1.0.0",
 81 |     "nbclient==0.9.0",
 82 |     "nbconvert==7.15.0",
 83 |     "nbformat==5.9.2",
 84 |     "nest-asyncio==1.6.0",
 85 |     "networkx==3.2.1",
 86 |     "numpy==1.26.3",
 87 |     "overrides==7.7.0",
 88 |     "packaging==23.2",
 89 |     "pandocfilters==1.5.1",
 90 |     "parso==0.8.3",
 91 |     "pexpect==4.9.0",
 92 |     "pillow==10.2.0",
 93 |     "pkginfo==1.11.1",
 94 |     "platformdirs==4.2.0",
 95 |     "poetry-core==1.9.0",
 96 |     "prometheus-client==0.19.0",
 97 |     "prompt-toolkit==3.0.43",
 98 |     "protobuf==5.27.3",
 99 |     "psutil==5.9.8",
100 |     "ptyprocess==0.7.0",
101 |     "pure-eval==0.2.2",
102 |     "pycparser==2.21",
103 |     "pygments==2.17.2",
104 |     "pysocks==1.7.1",
105 |     "python-dateutil==2.8.2",
106 |     "pyyaml==6.0.1",
107 |     "pyzmq==24.0.1",
108 |     "scipy==1.12.0",
109 |     "torch==2.4.0",
110 |     "transformers>=4.48.1",
111 |     "triton==3.0.0",
112 |     "xformers==0.0.27.post2",
113 |     "torchsde>=0.2.6",
114 |     "opencv-python>=4.11.0.86",
115 |     "jupyterlab-server==2.25.2",
116 |     "jupyterlab==4.1.0",
117 |     "omegaconf>=2.3.0",
118 |     "diffusers>=0.32.2",
119 |     "imageio>=2.37.0",
120 |     "kanjize>=1.5.0",
121 |     "soundfile>=0.13.1",
122 |     "kornia>=0.8.0",
123 |     "av>=14.2.0",
124 |     "spandrel>=0.4.1",
125 | 
126 | ]
127 | 
128 | [build-system]
129 | requires = ["setuptools", "wheel"]
130 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # Byte-compiled / optimized / DLL files
  3 | __pycache__/
  4 | *.py[cod]
  5 | *$py.class
  6 | *.DS_Store
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | streaming_response_time_test.txt
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | cover/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | .pybuilder/
 80 | target/
 81 | 
 82 | # Jupyter Notebook
 83 | .ipynb_checkpoints
 84 | 
 85 | # IPython
 86 | profile_default/
 87 | ipython_config.py
 88 | 
 89 | # pyenv
 90 | #   For a library or package, you might want to ignore these files since the code is
 91 | #   intended to run in multiple environments; otherwise, check them in:
 92 | # .python-version
 93 | 
 94 | # pipenv
 95 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 96 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 97 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 98 | #   install all needed dependencies.
 99 | #Pipfile.lock
100 | 
101 | # poetry
102 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
103 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
104 | #   commonly ignored for libraries.
105 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
106 | #poetry.lock
107 | 
108 | # pdm
109 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
110 | #pdm.lock
111 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
112 | #   in version control.
113 | #   https://pdm.fming.dev/#use-with-ide
114 | .pdm.toml
115 | 
116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
117 | __pypackages__/
118 | 
119 | # Celery stuff
120 | celerybeat-schedule
121 | celerybeat.pid
122 | 
123 | # SageMath parsed files
124 | *.sage.py
125 | 
126 | # Environments
127 | .env
128 | .venv
129 | /env
130 | venv/
131 | ENV/
132 | env.bak/
133 | venv.bak/
134 | 
135 | # Spyder project settings
136 | .spyderproject
137 | .spyproject
138 | 
139 | # Rope project settings
140 | .ropeproject
141 | 
142 | # mkdocs documentation
143 | /site
144 | 
145 | # mypy
146 | .mypy_cache/
147 | .dmypy.json
148 | dmypy.json
149 | 
150 | # Pyre type checker
151 | .pyre/
152 | 
153 | # pytype static type analyzer
154 | .pytype/
155 | 
156 | # Cython debug symbols
157 | cython_debug/
158 | 
159 | # PyCharm
160 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
161 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
162 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
163 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
164 | .idea/
165 | .gradio/
166 | *.lprof
167 | 
168 | # output files that are generated by the workflows and saved locally.
169 | avatar_images/*
170 | avatar_voice_recordings/*
171 | generated_videos/*
172 | generated_voices/*
173 | 
174 | aafactory/databases/*


--------------------------------------------------------------------------------
/aafactory/src/aafactory/utils/voice.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from pathlib import Path
  3 | import uuid
  4 | from aafactory.comfyui.video import QueueHistory, queue_task, upload_files_to_comfyui_server
  5 | from aafactory.configuration import DB_PATH, TEXT_TO_SPEECH_WITH_ZONOS_WORKFLOW_PATH, GENERATED_VOICE_PATH
  6 | from aafactory.database.manage_db import get_settings
  7 | from aafactory.schemas import Settings
  8 | from loguru import logger
  9 | import requests
 10 | from tinydb import TinyDB
 11 | 
 12 | async def send_request_to_elevenlabs(prompt: str, voice_id: str) -> Path:
 13 |     # Get API key from settings
 14 |     db = TinyDB(DB_PATH)
 15 |     settings = db.table("settings").all()[-1]
 16 |     api_key = settings["elevenlabs_api_key"]
 17 |     voice_id = voice_id
 18 |     url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
 19 |     
 20 |     headers = {
 21 |         "Accept": "audio/mpeg",
 22 |         "Content-Type": "application/json",
 23 |         "xi-api-key": api_key
 24 |     }
 25 |     
 26 |     data = {
 27 |         "text": prompt,
 28 |         "model_id": "eleven_multilingual_v2",
 29 |         "voice_settings": {
 30 |             "stability": 0.5,
 31 |             "similarity_boost": 0.5
 32 |         }
 33 |     }
 34 |     
 35 |     try:
 36 |         response = requests.post(url, json=data, headers=headers)
 37 |         response.raise_for_status()
 38 |         
 39 |         # Save the audio file
 40 |         output_path = GENERATED_VOICE_PATH / f"{uuid.uuid4().hex}.mp3"
 41 |         output_path.parent.mkdir(parents=True, exist_ok=True)
 42 |         with open(output_path, "wb") as f:
 43 |             f.write(response.content)
 44 |             
 45 |         return output_path
 46 |         
 47 |     except requests.exceptions.RequestException as e:
 48 |         logger.error(f"Error making request to ElevenLabs: {e}")
 49 |         raise e
 50 | 
 51 | 
 52 | async def send_request_to_zonos(text_response: str, voice_language: str, voice_recording_path: str, audio_transcript: str) -> Path:
 53 |     """
 54 |     Send a request to the server to generate a video.
 55 |     """
 56 |     settings = get_settings()
 57 |     voice_recording_path = Path(voice_recording_path)
 58 |     upload_files_to_comfyui_server([voice_recording_path])
 59 |     workflow = _create_text_to_speech_with_zonos_workflow(text_response, voice_language, voice_recording_path, audio_transcript)
 60 |     queue_history = await queue_task(workflow, settings)
 61 |     audio_url = _get_audio_url(settings, queue_history)
 62 |     output_path = _save_audio_to_file(audio_url)
 63 |     return output_path
 64 | 
 65 | 
 66 | def _create_text_to_speech_with_zonos_workflow(text_response: str, voice_language: str, voice_recording_path: Path, audio_transcript: str) -> dict:
 67 |     """
 68 |     Create a workflow for the text to speech with Zonos.
 69 |     """
 70 |     with open(TEXT_TO_SPEECH_WITH_ZONOS_WORKFLOW_PATH, "r") as f:
 71 |         workflow = json.load(f)
 72 |     workflow["12"]["inputs"]["audio"] = voice_recording_path.name
 73 |     workflow["24"]["inputs"]["speech"] = text_response
 74 |     workflow["24"]["inputs"]["language"] = voice_language
 75 |     workflow["24"]["inputs"]["sample_text"] = audio_transcript
 76 |     return {"prompt": workflow}
 77 | 
 78 | 
 79 | def _get_audio_url(settings: Settings, queue_history: QueueHistory) -> str:
 80 |     """
 81 |     Get the audio URL from the history response.
 82 |     """
 83 |     output_info = queue_history.response.get(queue_history.prompt_id, {}).get('outputs', {}).get('13', {}).get('audio', [{}])[0]
 84 |     filename = output_info.get('filename', 'unknown.mp3')
 85 |     output_url = f"{settings.comfy_server_url}/api/view?filename={filename}&subfolder=&type=temp"
 86 |     logger.success(f"Output URL: {output_url}")
 87 |     return output_url
 88 | 
 89 | 
 90 | def _save_audio_to_file(audio_url: str) -> Path:
 91 |     """
 92 |     Save the audio to a file.
 93 |     """
 94 |     output_path = GENERATED_VOICE_PATH / f"{uuid.uuid4().hex}.mp3"
 95 |     output_path.parent.mkdir(parents=True, exist_ok=True)
 96 |     response = requests.get(audio_url)
 97 |     with open(output_path, "wb") as f:
 98 |         f.write(response.content)
 99 |     logger.success(f"Audio saved to {output_path}")
100 |     return output_path


--------------------------------------------------------------------------------
/aafactory/src/aafactory/utils/interface.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import uuid
 3 | from aafactory.comfyui.video import send_request_to_generate_video
 4 | from aafactory.configuration import AVATAR_PAGE_SETTINGS_TABLE_NAME, DB_PATH, DEFAULT_AVATAR_IMAGE_PATH, GENERATED_VOICE_PATH, VOICE_MODELS
 5 | from aafactory.database.manage_db import AVATAR_TABLE_NAME
 6 | from aafactory.utils.voice import send_request_to_elevenlabs
 7 | import gradio as gr
 8 | from tinydb import TinyDB
 9 | import soundfile as sf
10 | 
11 | 
12 | def create_utils_interface():
13 |     with gr.Blocks() as utils:
14 |         with gr.Accordion("Audio to Video", open=False):
15 |             with gr.Row():
16 |                 with gr.Column():
17 |                     audio_avatar_image = gr.Textbox(value=DEFAULT_AVATAR_IMAGE_PATH, visible=False)
18 |                     audio_avatar_animation = gr.Video(value=DEFAULT_AVATAR_IMAGE_PATH, autoplay=True)
19 |                 with gr.Column():
20 |                     audio_file = gr.Audio(label="Audio File")
21 |             btn_generate_video = gr.Button("Generate Video")
22 |             btn_generate_video.click(fn=_generate_video_from_audio, inputs=[audio_file, audio_avatar_image], outputs=[audio_avatar_animation])
23 |         with gr.Accordion("Script to Video", open=False):
24 |             with gr.Row():
25 |                 with gr.Column():
26 |                     script_avatar_image = gr.Textbox(value=DEFAULT_AVATAR_IMAGE_PATH, visible=False)
27 |                     script_avatar_animation = gr.Video(value=DEFAULT_AVATAR_IMAGE_PATH, autoplay=True)
28 |                 with gr.Column():
29 |                     gr.Markdown("### Avatar Script")
30 |                     avatar_script = gr.TextArea()
31 |                     gr.Markdown("### Voice Model")
32 |                     voice_model = gr.Dropdown(choices=VOICE_MODELS, value="elevenlabs", interactive=True, info="Select the voice model you want to use")
33 |                     gr.Markdown("### Voice ID")
34 |                     voice_id = gr.Textbox(show_label=False, interactive=True, info="Enter the voice id you want to use")
35 |             btn_generate_video = gr.Button("Generate Video")
36 |             btn_generate_video.click(fn=_generate_video_from_script, inputs=[avatar_script, script_avatar_image, voice_model, voice_id], outputs=[script_avatar_animation])
37 |         
38 | 
39 |         utils.load(
40 |             fn=_load_avatar_infos_for_chat,
41 |             outputs=[audio_avatar_animation, audio_avatar_image, script_avatar_animation, script_avatar_image]
42 |         )
43 |     return utils
44 | 
45 | 
46 | async def _generate_video_from_audio(audio_file_bytes: bytes, avatar_image_str: bytes) -> str:
47 |     # Save the audio file
48 |     sample_rate, audio_data = audio_file_bytes  # Unpack the tuple
49 |     audio_file_path = GENERATED_VOICE_PATH / f"{uuid.uuid4().hex}.mp3"
50 |     audio_file_path.parent.mkdir(parents=True, exist_ok=True)
51 |     
52 |     # Normalize audio data to float32 between -1 and 1
53 |     audio_data = audio_data.astype('float32')
54 |     if audio_data.max() > 1.0 or audio_data.min() < -1.0:
55 |         audio_data = audio_data / max(abs(audio_data.max()), abs(audio_data.min()))
56 |     
57 |     # Save using soundfile with proper settings
58 |     sf.write(
59 |         str(audio_file_path), 
60 |         audio_data, 
61 |         sample_rate, 
62 |         format='MP3'
63 |     )
64 |     
65 |     avatar_image_path = Path(avatar_image_str)
66 |     video_response = await send_request_to_generate_video(avatar_image_path, audio_file_path)
67 |     return video_response
68 | 
69 | 
70 | async def _generate_video_from_script(avatar_script: str, avatar_image_str: str, voice_model: str, voice_id: str) -> str:
71 |     if voice_model == "elevenlabs":
72 |         audio_response = await send_request_to_elevenlabs(avatar_script, voice_id)
73 |     video_response = await send_request_to_generate_video(avatar_image_str, audio_response)
74 |     return video_response
75 | 
76 | 
77 | def _load_avatar_infos_for_chat() -> str:
78 |     db = TinyDB(DB_PATH)
79 |     table = db.table(AVATAR_TABLE_NAME)
80 |     avatar_page_settings_table = db.table(AVATAR_PAGE_SETTINGS_TABLE_NAME)
81 |     avatar_page_settings = avatar_page_settings_table.get(doc_id=1)  # Changed from 0 to 1 since TinyDB starts at 1
82 |     avatar_name = avatar_page_settings.get("avatar_name")
83 |     avatar_info = table.get(lambda x: x.get("name") == avatar_name) 
84 |     if avatar_info:
85 |         return (
86 |             avatar_info.get("avatar_image_path", ""),
87 |             avatar_info.get("avatar_image_path", ""),
88 |             avatar_info.get("avatar_image_path", ""),
89 |             avatar_info.get("avatar_image_path", "")
90 |         )
91 |     return DEFAULT_AVATAR_IMAGE_PATH, DEFAULT_AVATAR_IMAGE_PATH, DEFAULT_AVATAR_IMAGE_PATH, DEFAULT_AVATAR_IMAGE_PATH


--------------------------------------------------------------------------------
/cloud_setup/sonic/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [tool.poetry]
  2 | name = "workspace"
  3 | version = "0.1.0"
  4 | description = ""
  5 | authors = ["None"]
  6 | readme = "README.md"
  7 | 
  8 | [tool.poetry.dependencies]
  9 | python = "^3.10"
 10 | torch = "^2.4.0"
 11 | torchaudio = "^2.4.0"
 12 | torchvision = "^0.19.0"
 13 | accelerate = "0.33.0"
 14 | aiohttp = "3.9.3"
 15 | aiosignal = "1.3.1"
 16 | anyio = "4.2.0"
 17 | argon2-cffi = "23.1.0"
 18 | argon2-cffi-bindings = "21.2.0"
 19 | arrow = "1.3.0"
 20 | asttokens = "2.4.1"
 21 | async-lru = "2.0.4"
 22 | async-timeout = "4.0.3"
 23 | attrs = "23.2.0"
 24 | babel = "2.14.0"
 25 | beautifulsoup4 = "4.12.3"
 26 | bleach = "6.1.0"
 27 | build = "1.2.1"
 28 | cachecontrol = "0.14.0"
 29 | certifi = "2022.12.7"
 30 | cffi = "1.16.0"
 31 | charset-normalizer = "2.1.1"
 32 | cleo = "2.1.0"
 33 | cmake = "3.25.0"
 34 | comm = "0.2.1"
 35 | crashtest = "0.4.1"
 36 | cryptography = "43.0.0"
 37 | debugpy = "1.8.0"
 38 | decorator = "5.1.1"
 39 | defusedxml = "0.7.1"
 40 | distlib = "0.3.8"
 41 | dulwich = "0.21.7"
 42 | einops = "0.7.0"
 43 | entrypoints = "0.4"
 44 | exceptiongroup = "1.2.0"
 45 | executing = "2.0.1"
 46 | fastjsonschema = "2.19.1"
 47 | filelock = "3.15.4"
 48 | fqdn = "1.5.1"
 49 | frozenlist = "1.4.1"
 50 | fsspec = "2024.2.0"
 51 | gdown = "5.1.0"
 52 | gitdb = "4.0.11"
 53 | gitpython = "3.1.41"
 54 | h11 = "0.14.0"
 55 | httpcore = "1.0.2"
 56 | httpx = "0.26.0"
 57 | huggingface-hub = "0.24.5"
 58 | idna = "3.4"
 59 | importlib-metadata = "8.2.0"
 60 | installer = "0.7.0"
 61 | ipykernel = "6.29.1"
 62 | ipython = "8.21.0"
 63 | ipython-genutils = "0.2.0"
 64 | ipywidgets = "8.1.1"
 65 | isoduration = "20.11.0"
 66 | jaraco-classes = "3.4.0"
 67 | jedi = "0.19.1"
 68 | jeepney = "0.8.0"
 69 | jinja2 = "3.1.2"
 70 | json5 = "0.9.14"
 71 | jsonpointer = "2.4"
 72 | jsonschema = "4.21.1"
 73 | jsonschema-specifications = "2023.12.1"
 74 | jupyter-archive = "3.4.0"
 75 | jupyter-events = "0.9.0"
 76 | jupyter-highlight-selected-word = "0.2.0"
 77 | jupyter-lsp = "2.2.2"
 78 | jupyter-nbextensions-configurator = "0.6.3"
 79 | jupyter-client = "7.4.9"
 80 | jupyter-contrib-core = "0.4.2"
 81 | jupyter-contrib-nbextensions = "0.7.0"
 82 | jupyter-core = "5.7.1"
 83 | jupyter-server = "2.12.5"
 84 | jupyter-server-terminals = "0.5.2"
 85 | jupyterlab = "4.1.0"
 86 | jupyterlab-widgets = "3.0.9"
 87 | jupyterlab-pygments = "0.3.0"
 88 | jupyterlab-server = "2.25.2"
 89 | keyring = "24.3.1"
 90 | kornia = "0.7.3"
 91 | kornia-rs = "0.1.5"
 92 | lit = "15.0.7"
 93 | lxml = "5.1.0"
 94 | markupsafe = "2.1.3"
 95 | matplotlib-inline = "0.1.6"
 96 | matrix-client = "0.4.0"
 97 | mistune = "3.0.2"
 98 | more-itertools = "10.4.0"
 99 | mpmath = "1.3.0"
100 | msgpack = "1.0.8"
101 | multidict = "6.0.5"
102 | nbclassic = "1.0.0"
103 | nbclient = "0.9.0"
104 | nbconvert = "7.15.0"
105 | nbformat = "5.9.2"
106 | nest-asyncio = "1.6.0"
107 | networkx = "3.2.1"
108 | notebook = "6.5.5"
109 | notebook-shim = "0.2.3"
110 | numpy = "1.26.3"
111 | overrides = "7.7.0"
112 | packaging = "23.2"
113 | pandocfilters = "1.5.1"
114 | parso = "0.8.3"
115 | pexpect = "4.9.0"
116 | pillow = "10.2.0"
117 | pkginfo = "1.11.1"
118 | platformdirs = "4.2.0"
119 | poetry = "1.8.3"
120 | poetry-core = "1.9.0"
121 | poetry-plugin-export = "1.8.0"
122 | prometheus-client = "0.19.0"
123 | prompt-toolkit = "3.0.43"
124 | protobuf = "5.27.3"
125 | psutil = "5.9.8"
126 | ptyprocess = "0.7.0"
127 | pure-eval = "0.2.2"
128 | pycparser = "2.21"
129 | pygments = "2.17.2"
130 | pyproject-hooks = "1.1.0"
131 | pysocks = "1.7.1"
132 | python-dateutil = "2.8.2"
133 | python-json-logger = "2.0.7"
134 | pyyaml = "6.0.1"
135 | pyzmq = "24.0.1"
136 | rapidfuzz = "3.9.6"
137 | referencing = "0.33.0"
138 | regex = "2023.12.25"
139 | requests = "2.31.0"
140 | requests-toolbelt = "1.0.0"
141 | rfc3339-validator = "0.1.4"
142 | rfc3986-validator = "0.1.1"
143 | rpds-py = "0.17.1"
144 | safetensors = "0.4.2"
145 | scipy = "1.12.0"
146 | secretstorage = "3.3.3"
147 | send2trash = "1.8.2"
148 | sentencepiece = "0.2.0"
149 | shellingham = "1.5.4"
150 | six = "1.16.0"
151 | smmap = "5.0.1"
152 | sniffio = "1.3.0"
153 | soundfile = "0.12.1"
154 | soupsieve = "2.5"
155 | spandrel = "0.3.4"
156 | stack-data = "0.6.3"
157 | sympy = "1.12"
158 | terminado = "0.18.0"
159 | timm = "1.0.8"
160 | tinycss2 = "1.2.1"
161 | tokenizers = "0.15.1"
162 | tomli = "2.0.1"
163 | tomlkit = "0.13.0"
164 | torchsde = "0.2.6"
165 | tornado = "6.4"
166 | tqdm = "4.66.1"
167 | traitlets = "5.14.1"
168 | trampoline = "0.1.2"
169 | transformers = "4.37.2"
170 | triton = "^3.0.0"
171 | trove-classifiers = "2024.7.2"
172 | types-python-dateutil = "2.8.19.20240106"
173 | typing-extensions = "4.8.0"
174 | uri-template = "1.3.0"
175 | urllib3 = "1.26.13"
176 | virtualenv = "20.26.3"
177 | wcwidth = "0.2.13"
178 | webcolors = "1.13"
179 | webencodings = "0.5.1"
180 | websocket-client = "1.7.0"
181 | widgetsnbextension = "4.0.9"
182 | xformers = "^0.0.27.post2"
183 | yarl = "1.9.4"
184 | zipp = "3.19.2"
185 | insightface = "^0.7.3"
186 | 
187 | 
188 | [build-system]
189 | requires = ["poetry-core"]
190 | build-backend = "poetry.core.masonry.api"
191 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # AI Avatar Factory
  2 | 
  3 | ⚡ AI Avatar Factory is an interface for creating and managing AI avatars. ⚡
  4 | 
  5 | [![Website](https://img.shields.io/badge/website-000000?style=for-the-badge&logo=AAFactory.xyz&logoColor=white
  6 | )](https://aafactory.xyz/)
  7 | [![Discord](https://img.shields.io/badge/Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/C2Rjy8Q2ER)
  8 | 
  9 | ![AAFactory Screenshot](https://github.com/Reekomer/aafactory/blob/main/github_assets/napoleon_example.png?raw=true)
 10 | 
 11 | 
 12 | # IMPORTANT
 13 | - we are building a better UI for the project in here: https://github.com/orgs/AA-Factory/repositories.
 14 | This repository is being deprecated.
 15 | 
 16 | ## File Tree
 17 | 
 18 | ```bash
 19 | ├── LICENSE.md
 20 | ├── README.md
 21 | ├── aafactory
 22 | │   ├── assets
 23 | │   │   ├── avatar_images
 24 | │   │   │   └── ...png
 25 | │   │   ├── avatar_voice_recordings
 26 | │   │   │   └── ...wav
 27 | │   │   ├── demo
 28 | │   │   │   ├── avatar.jpg
 29 | │   │   │   ├── avatar.mp4
 30 | │   │   │   └── voice_recording.mp3
 31 | │   │   ├── generated_video
 32 | │   │   ├── generated_videos
 33 | │   │   │   └── ...mp4
 34 | │   │   ├── generated_voice
 35 | │   │   │   └── ...mp3
 36 | │   │   └── generated_voices
 37 | │   │       └── ...mp3
 38 | │   ├── databases
 39 | │   │   └── avatar_db.json
 40 | │   ├── src
 41 | │   │   ├── __init__.py
 42 | │   │   └── aafactory
 43 | │   │       ├── __init__.py
 44 | │   │       ├── act
 45 | │   │       │   └── interface.py
 46 | │   │       ├── avatar
 47 | │   │       │   └── interface.py
 48 | │   │       ├── chat
 49 | │   │       │   └── interface.py
 50 | │   │       ├── comfyui
 51 | │   │       │   └── video.py
 52 | │   │       ├── configuration.py
 53 | │   │       ├── create_gradio_ui.py
 54 | │   │       ├── database
 55 | │   │       │   └── manage_db.py
 56 | │   │       ├── fetcher
 57 | │   │       │   ├── environment_objects.py
 58 | │   │       │   └── fetching.py
 59 | │   │       ├── main.py
 60 | │   │       ├── prompts.py
 61 | │   │       ├── react
 62 | │   │       │   └── interface.py
 63 | │   │       ├── schemas.py
 64 | │   │       ├── settings.py
 65 | │   │       ├── style.py
 66 | │   │       └── utils
 67 | │   │           ├── interface.py
 68 | │   │           └── voice.py
 69 | │   ├── tests
 70 | │   └── workflows
 71 | │       ├── audio_image_to_video_with_sonic.json
 72 | │       └── text_to_speech_with_zonos.json
 73 | ├── cloud_setup
 74 | │   ├── joyvasa
 75 | │   │   └── setup_joyvasa.sh
 76 | │   ├── pyproject.toml
 77 | │   ├── sonic
 78 | │   │   ├── install_sonic.sh
 79 | │   │   └── pyproject.toml
 80 | │   ├── uv.lock
 81 | │   └── zonos
 82 | │       └── pyproject.toml
 83 | ├── github_assets
 84 | │   ├── hpi-logo-white.svg
 85 | │   └── napoleon_example.png
 86 | ├── poetry.lock
 87 | ├── pyproject.toml
 88 | └── file_tree.txt
 89 | ```
 90 | 
 91 | 
 92 | ## Tutorial:
 93 | - Youtube tutorial: https://www.youtube.com/watch?v=MGmBf7OsFJk
 94 | ## Installation
 95 | 
 96 | Install the required packages by running the following commands:
 97 | 
 98 | ```bash
 99 | pip install poetry
100 | ```
101 | 
102 | ```bash
103 | poetry install
104 | ```
105 | 
106 | ### ComfyUI
107 | - Use Video Helper Suite v1.5.0 (can be selected in ComfyManager)
108 | 
109 | ## Run the application
110 | 
111 | If you use VSCode, you can run the application by clicking on the `Run and Debug` button and selecting `Python: Run and Debug` and then `Run Gradio UI`.
112 | 
113 | If you don't use VSCode, you can run the application by running the following command:
114 | 
115 | ```bash
116 | python aafactory/src/aafactory/create_gradio_ui.py
117 | ```
118 | 
119 | You will also need:
120 | - ElevenLabs API key
121 | - OpenAI API key
122 | - ComfyUI server URL
123 | 
124 | For ComfyUI, the worflow is defined in the `workflows` folder. You need to make sure the nodes are installed. A more detailed guide will be available soon.
125 | 
126 | ### Runpod Template for ComfyUI
127 | 
128 | - [Template](https://runpod.io/console/deploy?template=laidmkkjli&ref=uw67f0zc)
129 | 
130 | 
131 | ### Current Tech Stack:
132 | 
133 | - Gradio – Frontend
134 | 
135 | - ComfyUI – Backend
136 | 
137 | - OpenAI API – LLM
138 | 
139 | - ElevenLabs – TTS
140 | 
141 | - Flux – Text-to-Image (Avatar Generation)
142 | 
143 | - Sonic – Audio-Driven Video Generation
144 | 
145 | 
146 | ## More Examples
147 | 
148 | See our website for more examples: [AAFactory.xyz](https://aafactory.xyz/)
149 | 
150 | 
151 | ## Incoming Features
152 | 
153 | - [ ] Add support for Hugging Face models (Text to Speech and Text to Text)
154 | - [x] Create documentation for ComfyUI cloud hosting
155 | - [ ] Improve ComfyUI cloud hosting setup
156 | - [x] Enable users to manage several avatars
157 | - [ ] Enable users to easily share avatar's setup with others
158 | - [ ] Add feature to let an avatar react to a Youtube video
159 | - [ ] Add microphone button for direct chat with Avatar
160 | 
161 | 
162 | ## Partners
163 | 
164 | ![HPI Logo](https://github.com/Reekomer/aafactory/blob/main/github_assets/hpi-logo-white.svg?raw=true)
165 | 


--------------------------------------------------------------------------------
/aafactory/src/aafactory/style.py:
--------------------------------------------------------------------------------
  1 | CSS = """
  2 | /* Main theme colors */
  3 | :root {
  4 |     --primary-color: #73111e;
  5 |     --secondary-color: #18A1FA;
  6 |     --accent-color: #06BEE1;
  7 |     --background-dark: #0F172A;
  8 |     --background-light: #1E293B;
  9 |     --text-light: #F8FAFC;
 10 |     --text-muted: #94A3B8;
 11 |     --border-color: rgba(45, 127, 249, 0.2);
 12 |     --shadow-color: rgba(45, 127, 249, 0.1);
 13 |     --gradient-bg: linear-gradient(135deg, var(--background-dark) 0%, var(--background-light) 100%);
 14 | }
 15 | 
 16 | /* Global styles */
 17 | .gradio-container {
 18 |     background: var(--gradient-bg) !important;
 19 |     color: var(--text-light) !important;
 20 |     font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important;
 21 | }
 22 | 
 23 | /* Headers */
 24 | h1, h2, h3 {
 25 |     color: var(--text-light) !important;
 26 |     font-weight: 600 !important;
 27 |     letter-spacing: -0.02em !important;
 28 |     margin-bottom: 1rem !important;
 29 | }
 30 | 
 31 | /* Buttons */
 32 | button, .button {
 33 |     background: var(--primary-color) !important;
 34 |     border: none !important;
 35 |     color: var(--text-light) !important;
 36 |     padding: 0.5rem 1rem !important;
 37 |     border-radius: 8px !important;
 38 |     font-weight: 500 !important;
 39 |     transition: all 0.2s ease !important;
 40 |     box-shadow: 0 2px 4px var(--shadow-color) !important;
 41 | }
 42 | 
 43 | button:hover, .button:hover {
 44 |     background: var(--secondary-color) !important;
 45 |     transform: translateY(-1px) !important;
 46 |     box-shadow: 0 4px 8px var(--shadow-color) !important;
 47 | }
 48 | 
 49 | /* Input fields */
 50 | input, textarea {
 51 |     background: var(--background-light) !important;
 52 |     border: 1px solid var(--border-color) !important;
 53 |     color: var(--text-light) !important;
 54 |     border-radius: 8px !important;
 55 |     padding: 0.75rem !important;
 56 |     transition: all 0.2s ease !important;
 57 | }
 58 | 
 59 | input:focus, textarea:focus {
 60 |     border-color: var(--primary-color) !important;
 61 |     box-shadow: 0 0 0 2px var(--shadow-color) !important;
 62 |     outline: none !important;
 63 | }
 64 | 
 65 | /* Labels */
 66 | label {
 67 |     color: var(--text-muted) !important;
 68 |     font-size: 0.875rem !important;
 69 |     font-weight: 500 !important;
 70 |     margin-bottom: 0.5rem !important;
 71 | }
 72 | 
 73 | /* Chat interface */
 74 | .chatbot {
 75 |     background: var(--background-light) !important;
 76 |     border: 1px solid var(--border-color) !important;
 77 |     border-radius: 12px !important;
 78 |     box-shadow: 0 4px 6px var(--shadow-color) !important;
 79 |     overflow: hidden !important;
 80 | }
 81 | 
 82 | .message {
 83 |     background: var(--background-dark) !important;
 84 |     border-radius: 8px !important;
 85 |     margin: 0.5rem !important;
 86 |     padding: 1rem !important;
 87 |     color: var(--text-light) !important;
 88 | }
 89 | 
 90 | /* Dropdowns and Selects */
 91 | select, .select {
 92 |     background: var(--background-light) !important;
 93 |     border: 1px solid var(--border-color) !important;
 94 |     border-radius: 8px !important;
 95 |     color: var(--text-light) !important;
 96 |     padding: 0.5rem !important;
 97 | }
 98 | 
 99 | /* Progress bars */
100 | .progress-bar {
101 |     background: var(--primary-color) !important;
102 |     height: 4px !important;
103 |     border-radius: 2px !important;
104 | }
105 | 
106 | /* Scrollbar */
107 | ::-webkit-scrollbar {
108 |     width: 8px;
109 |     height: 8px;
110 | }
111 | 
112 | ::-webkit-scrollbar-track {
113 |     background: var(--background-dark);
114 |     border-radius: 4px;
115 | }
116 | 
117 | ::-webkit-scrollbar-thumb {
118 |     background: var(--primary-color);
119 |     border-radius: 4px;
120 | }
121 | 
122 | ::-webkit-scrollbar-thumb:hover {
123 |     background: var(--secondary-color);
124 | }
125 | 
126 | /* Containers and Cards */
127 | .container, .card {
128 |     background: var(--background-light) !important;
129 |     border: 1px solid var(--border-color) !important;
130 |     border-radius: 12px !important;
131 |     padding: 1.5rem !important;
132 |     margin: 1rem 0 !important;
133 |     box-shadow: 0 4px 6px var(--shadow-color) !important;
134 | }
135 | 
136 | /* Fix for Gradio specific elements */
137 | .gr-box, .gr-form {
138 |     border-radius: 12px !important;
139 |     border: 1px solid var(--border-color) !important;
140 |     background: var(--background-light) !important;
141 | }
142 | 
143 | .gr-padded {
144 |     padding: 1.5rem !important;
145 | }
146 | 
147 | /* Responsive adjustments */
148 | @media (max-width: 640px) {
149 |     .container, .card {
150 |         padding: 1rem !important;
151 |     }
152 |     
153 |     button, .button {
154 |         width: 100% !important;
155 |     }
156 | }
157 | 
158 | /* Audio and Video elements */
159 | audio, video {
160 |     border-radius: 8px !important;
161 |     background: var(--background-dark) !important;
162 |     margin: 0.5rem 0 !important;
163 | }
164 | 
165 | /* File upload areas */
166 | .upload-box {
167 |     border: 2px dashed var(--border-color) !important;
168 |     border-radius: 12px !important;
169 |     background: var(--background-dark) !important;
170 |     padding: 2rem !important;
171 |     text-align: center !important;
172 | }
173 | 
174 | .upload-box:hover {
175 |     border-color: var(--primary-color) !important;
176 | }
177 | """


--------------------------------------------------------------------------------
/aafactory/src/aafactory/chat/interface.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | from aafactory.comfyui.video import send_request_to_generate_video
  3 | from aafactory.configuration import AVATAR_PAGE_SETTINGS_TABLE_NAME, DB_PATH, DEFAULT_AVATAR_IMAGE_PATH, VOICE_MODELS
  4 | from aafactory.database.manage_db import AVATAR_TABLE_NAME
  5 | from aafactory.fetcher.fetching import send_request_to_open_ai
  6 | from aafactory.utils.voice import send_request_to_elevenlabs, send_request_to_zonos
  7 | import gradio as gr
  8 | from PIL import Image
  9 | from string import Template
 10 | 
 11 | from tinydb import TinyDB
 12 | 
 13 | CHAT_HISTORY = []
 14 | CURRENT_AVATAR = None # Global variable to track current avatar
 15 | SYSTEM_PROMPT = Template("""
 16 | Your name is $name.
 17 | Personality:
 18 | $personality
 19 | 
 20 | Background Knowledge:
 21 | $background_knowledge
 22 | """)
 23 | 
 24 | def create_chat_interface():
 25 |     with gr.Blocks() as chat:
 26 |         with gr.Row():
 27 |             with gr.Column():
 28 |                 name = gr.Textbox(label="Name", visible=False)
 29 |                 personality = gr.Textbox(label="Personality", visible=False)
 30 |                 background_knowledge = gr.Textbox(label="Background Knowledge", visible=False)
 31 |                 voice_model = gr.Dropdown(label="Voice Model", choices=VOICE_MODELS, visible=False)
 32 |                 voice_id = gr.Textbox(label="Voice ID", visible=False)
 33 |                 voice_language = gr.Textbox(label="Voice Language", visible=False)
 34 |                 voice_recording_path = gr.Textbox(label="Voice Recording", visible=False)
 35 |                 audio_transcript = gr.Textbox(label="Audio Transcript", visible=False)
 36 |                 avatar_image = gr.Textbox(value=DEFAULT_AVATAR_IMAGE_PATH, visible=False)
 37 |                 avatar_animation = gr.Video(value=DEFAULT_AVATAR_IMAGE_PATH, autoplay=True)
 38 |             with gr.Column():
 39 |                 chatbot = gr.Chatbot(placeholder="<strong>Your Personal Avatar</strong><br>Ask Me Anything")
 40 |                 chatbot.like(vote, None, None)
 41 |                 msg = gr.Textbox(label="Message")
 42 |                 submit_btn = gr.Button("Send")
 43 |                 submit_btn.click(
 44 |                     fn=send_request_to_llm,
 45 |                     inputs=[avatar_image, msg, name, personality, background_knowledge, voice_model, voice_id, voice_recording_path, audio_transcript, voice_language],
 46 |                     outputs=[msg, chatbot, avatar_animation]
 47 |                 )
 48 |                 # Add refresh event
 49 |         chat.load(
 50 |             fn=_load_avatar_infos_for_chat,
 51 |             outputs=[name, personality, background_knowledge, avatar_animation, voice_model, voice_id, voice_recording_path, audio_transcript, voice_language, avatar_image]
 52 |         )
 53 |         return chat
 54 | 
 55 | 
 56 | def _load_avatar_infos_for_chat():
 57 |     db = TinyDB(DB_PATH)
 58 |     table = db.table(AVATAR_TABLE_NAME)
 59 |     avatar_page_settings_table = db.table(AVATAR_PAGE_SETTINGS_TABLE_NAME)
 60 |     avatar_page_settings = avatar_page_settings_table.get(doc_id=1)  # Changed from 0 to 1 since TinyDB starts at 1
 61 |     avatar_name = avatar_page_settings.get("avatar_name")
 62 |     avatar_info = table.get(lambda x: x.get("name") == avatar_name) 
 63 |     if avatar_info:
 64 |         return (
 65 |             avatar_info.get("name", ""),
 66 |             avatar_info.get("personality", ""),
 67 |             avatar_info.get("background_knowledge", ""),
 68 |             avatar_info.get("avatar_image_path", "") ,  # for video
 69 |             avatar_info.get("voice_model", "elevenlabs"),
 70 |             avatar_info.get("voice_id", ""),
 71 |             avatar_info.get("voice_recording_path", ""),
 72 |             avatar_info.get("audio_transcript", ""),
 73 |             avatar_info.get("voice_language", ""),
 74 |             avatar_info.get("avatar_image_path", "")  # for image path
 75 |         )
 76 |     return "", "", "", DEFAULT_AVATAR_IMAGE_PATH, "elevenlabs", "", "", "", "", ""
 77 | 
 78 | async def send_request_to_llm(avatar_image_path: str, user_prompt: str, name: str, personality: str, background_knowledge: str, voice_model: str, voice_id: str, voice_recording_path: str, audio_transcript: str, voice_language: str) -> tuple[str, list, str]:
 79 |     global CURRENT_AVATAR
 80 | 
 81 |     # Clear history if avatar changed
 82 |     if CURRENT_AVATAR != name:
 83 |         CHAT_HISTORY.clear()
 84 |         CURRENT_AVATAR = name
 85 |             
 86 |     user_message = user_prompt
 87 |     avatar_image_path = Path(avatar_image_path)
 88 |     if len(CHAT_HISTORY) > 0:
 89 |         user_message = CHAT_HISTORY[-1][0] + user_prompt
 90 |     messages = [
 91 |         {"role": "system", "content": SYSTEM_PROMPT.substitute(name=name, personality=personality, background_knowledge=background_knowledge)},
 92 |         {"role": "user", "content": user_message},
 93 |     ]
 94 |     text_response = await send_request_to_open_ai(messages)
 95 |     if voice_model == "elevenlabs":
 96 |         audio_response = await send_request_to_elevenlabs(text_response, voice_id)
 97 |     elif voice_model == "zonos":
 98 |         audio_response = await send_request_to_zonos(text_response, voice_language, voice_recording_path, audio_transcript)
 99 |     video_response = await send_request_to_generate_video(avatar_image_path, audio_response)
100 |     # Return empty message (to clear input), updated history, and animation path
101 |     CHAT_HISTORY.append([user_prompt, text_response])
102 |     return "", CHAT_HISTORY, video_response
103 | 
104 | def vote(data: gr.LikeData):
105 |     if data.liked:
106 |         print("You upvoted this response: " + data.value["value"])
107 |     else:
108 |         print("You downvoted this response: " + data.value["value"])
109 | 
110 | 
111 | def update_avatar_image(image: Image.Image):
112 |     global AVATAR_IMAGE
113 |     AVATAR_IMAGE = image
114 |     return image
115 | 


--------------------------------------------------------------------------------
/aafactory/src/aafactory/database/manage_db.py:
--------------------------------------------------------------------------------
  1 | import uuid
  2 | import numpy as np
  3 | import gradio as gr
  4 | import soundfile as sf
  5 | from aafactory.schemas import Settings
  6 | from loguru import logger
  7 | from tinydb import TinyDB
  8 | from PIL import Image
  9 | from pathlib import Path
 10 | from aafactory.configuration import AVATAR_PAGE_SETTINGS_TABLE_NAME, AVATAR_TABLE_NAME, DB_PATH, AVATAR_IMAGES_PATH, DEFAULT_VOICE_RECORDING_PATH, SETTINGS_TABLE_NAME, AVATAR_VOICE_RECORDINGS_PATH
 11 | 
 12 | 
 13 | def update_avatar_infos(name: str, personality: str, background_knowledge: str, avatar_image: Image.Image, voice_model: str, voice_id: str, voice_recording: bytes, audio_transcript: str, voice_language: str) -> None:
 14 |     """
 15 |     Update the avatar infos in the database.
 16 |     """
 17 |     db = TinyDB(DB_PATH)
 18 |     table = db.table(AVATAR_TABLE_NAME)
 19 |     # Check if avatar with same name already exists
 20 |     existing_avatar = table.get(lambda x: x.get('name') == name)
 21 |     if existing_avatar:
 22 |         logger.warning(f"Avatar with name '{name}' already exists")
 23 |         gr.Warning(f"Avatar with name '{name}' already exists. Please choose a different name.")
 24 |         return
 25 |     avatar_image_path = _save_avatar_image(avatar_image, AVATAR_IMAGES_PATH)
 26 |     if voice_model == "elevenlabs":
 27 |         avatar_infos = {"name": name, "personality": personality, "background_knowledge": background_knowledge, "avatar_image_path": avatar_image_path, "voice_model": voice_model, "voice_id": voice_id}
 28 |     elif voice_model == "zonos":
 29 |         voice_recording_file_path = _save_voice_recording(voice_recording)
 30 |         avatar_infos = {"name": name, "personality": personality, "background_knowledge": background_knowledge, "avatar_image_path": avatar_image_path, "voice_model": voice_model, "voice_language": voice_language, "voice_recording_path": voice_recording_file_path, "audio_transcript": audio_transcript}
 31 |     table.insert(avatar_infos)
 32 |     logger.success(f"Avatar infos updated: {avatar_infos}")
 33 |     gr.Info("Avatar infos updated",)
 34 | 
 35 | 
 36 | def _save_voice_recording(voice_recording: tuple[int, np.ndarray]) -> str:
 37 |     """
 38 |     Save the voice recording to the voice recording path.
 39 |     """
 40 |     voice_recording_file_path = AVATAR_VOICE_RECORDINGS_PATH / f"{uuid.uuid4()}.wav"
 41 |     sf.write(voice_recording_file_path, voice_recording[1], voice_recording[0])
 42 |     return voice_recording_file_path.as_posix()
 43 | 
 44 | 
 45 | def _save_avatar_image(avatar_image: Image.Image, avatar_image_folder: Path) -> str:
 46 |     """
 47 |     Save the avatar image to the avatar image path.
 48 |     """
 49 |     avatar_image_folder.mkdir(parents=True, exist_ok=True)
 50 |     avatar_image_path = avatar_image_folder / f"{uuid.uuid4()}.png"
 51 |     avatar_image.save(avatar_image_path)
 52 |     return avatar_image_path.as_posix()
 53 | 
 54 | def load_avatar_infos() -> tuple[str, str, str, Image.Image, str, str, str, str]:
 55 |     """
 56 |     Load the avatar infos from the database.
 57 |     """
 58 |     db = TinyDB(DB_PATH)
 59 |     avatar_table = db.table(AVATAR_TABLE_NAME)
 60 |     avatar_page_settings_table = db.table(AVATAR_PAGE_SETTINGS_TABLE_NAME)
 61 |     avatar_page_settings = avatar_page_settings_table.get(doc_id=1)  # Changed from 0 to 1 since TinyDB starts at 1
 62 |     avatar_name = avatar_page_settings.get("avatar_name")
 63 |     avatar_info = avatar_table.get(lambda x: x.get("name") == avatar_name)
 64 |     if avatar_page_settings.get("is_creating_new_avatar"):
 65 |         return "", "", "", None, "", "", None, "", ""
 66 |     if avatar_info:
 67 |         gr.Info(f"Current loaded avatar is {avatar_name}")
 68 |         return (
 69 |             avatar_info.get("name", ""),
 70 |             avatar_info.get("personality", ""),
 71 |             avatar_info.get("background_knowledge", ""),
 72 |             avatar_info.get("avatar_image_path", ""),
 73 |             avatar_info.get("voice_model", ""),
 74 |             avatar_info.get("voice_id", ""),
 75 |             avatar_info.get("voice_recording_path", None),
 76 |             avatar_info.get("audio_transcript", ""),
 77 |             avatar_info.get("voice_language", "")
 78 |         )
 79 |     return "", "", "", None, "", "", None, "", ""
 80 | 
 81 | def load_selected_avatar_infos(avatar_name: str) -> tuple[str, str, str, Image.Image, str, str, str, str]:
 82 |     """
 83 |     Load the avatar infos from the database.
 84 |     """
 85 |     db = TinyDB(DB_PATH)
 86 |     avatar_table = db.table(AVATAR_TABLE_NAME)
 87 |     avatar_info = avatar_table.get(lambda x: x.get("name") == avatar_name)  # Changed from 0 to 1 since TinyDB starts at 1
 88 |     if avatar_info:
 89 |         save_avatar_page_settings(False, avatar_name)
 90 |         return (
 91 |             avatar_info.get("name", ""),
 92 |             avatar_info.get("personality", ""),
 93 |             avatar_info.get("background_knowledge", ""),
 94 |             avatar_info.get("avatar_image_path", ""),
 95 |             avatar_info.get("voice_model", ""),
 96 |             avatar_info.get("voice_id", ""),
 97 |             avatar_info.get("voice_recording_path", None),
 98 |             avatar_info.get("audio_transcript", ""),
 99 |             avatar_info.get("voice_language", "")
100 |         )
101 |     return "", "", "", None, "", "", None, "", ""
102 | 
103 | 
104 | def get_settings() -> Settings:
105 |     """
106 |     Get the settings from the database.
107 |     """
108 |     db = TinyDB(DB_PATH)
109 |     table = db.table(SETTINGS_TABLE_NAME)
110 |     return Settings(**table.get(doc_id=1))
111 | 
112 | def save_avatar_page_settings(is_creating_new_avatar: bool, avatar_name: str | None = None) -> None:
113 |     """
114 |     Save the avatar page settings in the database.
115 |     """
116 |     db = TinyDB(DB_PATH)
117 |     table = db.table(AVATAR_PAGE_SETTINGS_TABLE_NAME)
118 |     if AVATAR_PAGE_SETTINGS_TABLE_NAME in db.tables():
119 |         db.drop_table(AVATAR_PAGE_SETTINGS_TABLE_NAME)
120 |     table.insert({"is_creating_new_avatar": is_creating_new_avatar, "avatar_name": avatar_name})
121 |     logger.success(f"Avatar page settings updated: {is_creating_new_avatar} {avatar_name}")
122 | 
123 | 
124 | def get_available_avatars() -> list[str]:
125 |     """
126 |     Get the available avatars from the database.
127 |     """
128 |     db = TinyDB(DB_PATH)
129 |     table = db.table(AVATAR_TABLE_NAME)
130 |     return [avatar.get("name") for avatar in table.all()]


--------------------------------------------------------------------------------
/aafactory/src/aafactory/avatar/interface.py:
--------------------------------------------------------------------------------
 1 | from aafactory.database.manage_db import get_available_avatars, load_avatar_infos, load_selected_avatar_infos, save_avatar_page_settings, update_avatar_infos
 2 | import gradio as gr
 3 | from aafactory.configuration import VOICE_LANGUAGES, VOICE_MODELS
 4 | 
 5 | def create_avatar_setup_interface() -> None:
 6 |     """
 7 |     Create the avatar setup interface.
 8 |     """
 9 |     avatar_setup = gr.Row()
10 |     available_avatars = gr.Dropdown(choices=[], visible=False)
11 |     avatar_infos = gr.Accordion(visible=False)
12 |     voice_settings = gr.Accordion(visible=False)
13 |     submit_btn = gr.Button("Save Avatar Infos", visible=False)
14 |     with gr.Blocks() as define_avatar:
15 |         with avatar_setup:
16 |             with gr.Column() as create_avatar:
17 |                 create_avatar_btn = gr.Button("Create New Avatar")
18 |                 create_avatar_btn.click(
19 |                     fn=_create_avatar_infos,
20 |                     inputs=[],
21 |                     outputs=[avatar_infos, voice_settings, submit_btn]
22 |                 )
23 |             with gr.Column() as load_avatar:
24 |                 load_avatar_btn = gr.Button("Load Existing Avatar")
25 |                 load_avatar_btn.click(
26 |                     fn=_load_available_avatars,
27 |                     inputs=[],
28 |                     outputs=[available_avatars, avatar_infos, voice_settings, submit_btn]
29 |                 )
30 |         
31 |         with avatar_infos:
32 |             gr.Markdown("### Name")
33 |             name = gr.Textbox(show_label=False, info="Enter the name of your avatar")
34 |             gr.Markdown("### Personality")
35 |             personality = gr.TextArea(show_label=False, info="Enter the personality of your avatar")
36 |             gr.Markdown("### Background Knowledge")
37 |             background_knowledge = gr.TextArea(show_label=False, info="Enter the background knowledge of your avatar")
38 |             gr.Markdown("### Avatar Image")
39 |             avatar_image = gr.Image(sources=["upload"], type="pil", show_label=False, show_download_button=False, show_fullscreen_button=False)
40 |         with voice_settings:
41 |             gr.Markdown("### Voice Model")
42 |             voice_model = gr.Dropdown(choices=VOICE_MODELS, value="elevenlabs", interactive=True, info="Select the voice model you want to use")
43 |             voice_id = gr.Textbox(show_label=False, visible=False, interactive=True, info="Enter the voice id you want to use")
44 |             voice_recording = gr.Audio(show_label=False, visible=False, interactive=True, label="Upload a voice sample")
45 |             audio_transcript = gr.TextArea(show_label=False, visible=False, interactive=True, info="Enter the audio transcript you want to use")
46 |             voice_language = gr.Dropdown(choices=VOICE_LANGUAGES, value="en-us", visible=False, interactive=True, info="Select the voice language you want to use")
47 |         available_avatars.change(
48 |             fn=load_selected_avatar_infos,
49 |             inputs=[available_avatars],
50 |             outputs=[name, personality, background_knowledge, avatar_image, voice_model, voice_id, voice_recording, audio_transcript, voice_language]
51 |         )
52 |         submit_btn.click(
53 |             fn=update_avatar_infos,
54 |             inputs=[name, personality, background_knowledge, avatar_image, voice_model, voice_id, voice_recording, audio_transcript, voice_language]
55 |         )
56 |         voice_model.change(
57 |             fn=_adapt_ui_to_voice_model,
58 |             inputs=[voice_model],
59 |             outputs=[voice_id, voice_recording, audio_transcript, voice_language]
60 |         )
61 |         # Add refresh event
62 |         define_avatar.load(
63 |             fn=load_avatar_infos,
64 |             inputs=[],
65 |             outputs=[name, personality, background_knowledge, avatar_image, voice_model, voice_id, voice_recording, audio_transcript, voice_language]
66 |         )
67 | 
68 | def _load_available_avatars() -> tuple[gr.Dropdown, gr.Accordion, gr.Accordion, gr.Button]:
69 |     """
70 |     Load the available avatars.
71 |     """
72 |     save_avatar_page_settings(False)
73 |     available_avatars = get_available_avatars()
74 |     return gr.Dropdown(choices=available_avatars, interactive=True, visible=True), gr.Accordion("Avatar Infos", visible=True), gr.Accordion("Voice Settings", visible=True), gr.Button("Save Avatar Infos", visible=True)   
75 | 
76 | 
77 | def _adapt_ui_to_voice_model(voice_model: str) -> tuple[gr.Textbox, gr.Audio, gr.TextArea]:
78 |     """
79 |     Adapt the UI to the voice model.
80 |     """
81 |     if voice_model == "elevenlabs":
82 |         return gr.Textbox(show_label=False, visible=True, interactive=True, info="Enter the voice id you want to use"), gr.Audio(show_label=False, visible=False, interactive=True, label="Upload a voice sample"), gr.TextArea(show_label=False, visible=False, interactive=True, info="Enter the audio transcript you want to use"), gr.Dropdown(choices=VOICE_LANGUAGES, value="en-us", visible=False, interactive=True, info="Select the voice language you want to use")
83 |     if voice_model == "zonos":
84 |         gr.Markdown("### Clone a voice")
85 |         return gr.Textbox(show_label=False, visible=False, interactive=True, info="Enter the voice id you want to use"), gr.Audio(show_label=False, visible=True, interactive=True, label="Upload a voice sample"), gr.TextArea(show_label=False, visible=True, interactive=True, info="Enter the audio transcript you want to use"), gr.Dropdown(choices=VOICE_LANGUAGES, value="en-us", visible=True, interactive=True, info="Select the voice language you want to use")
86 |     if voice_model == "":
87 |         return gr.Textbox(show_label=False, visible=True, interactive=True, info="Enter the voice id you want to use"), gr.Audio(show_label=False, visible=False, interactive=True, label="Upload a voice sample"), gr.TextArea(show_label=False, visible=False, interactive=True, info="Enter the audio transcript you want to use"), gr.Dropdown(choices=VOICE_LANGUAGES, value="en-us", visible=False, interactive=True, info="Select the voice language you want to use")
88 |     raise ValueError(f"Voice model {voice_model} not supported")
89 | 
90 | def _create_avatar_infos() -> tuple[gr.Accordion, gr.Accordion, gr.Button]:
91 |     """
92 |     Create the avatar infos.
93 |     """
94 |     save_avatar_page_settings(True)
95 |     gr.Info("Creating new avatar. Please reload the page to empty the fields.",)
96 |     return gr.Accordion("Avatar Infos", visible=True), gr.Accordion("Voice Settings", visible=True), gr.Button("Save Avatar Infos", visible=True)
97 | 
98 | 


--------------------------------------------------------------------------------
/aafactory/src/aafactory/comfyui/video.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from pathlib import Path
  3 | import time
  4 | import uuid
  5 | from aafactory.configuration import GENERATED_VIDEO_PATH, WORKFLOW_FOLDER
  6 | from aafactory.schemas import Settings
  7 | from loguru import logger
  8 | from pydantic import BaseModel
  9 | import requests
 10 | import soundfile as sf
 11 | from aafactory.database.manage_db import get_settings
 12 | 
 13 | 
 14 | class QueueHistory(BaseModel):
 15 |     prompt_id: str
 16 |     response: dict
 17 | 
 18 | 
 19 | async def send_request_to_generate_video(avatar_image_path: Path, audio_file_path: Path) -> Path:
 20 |     """
 21 |     Send a request to the server to generate a video.
 22 |     """
 23 |     settings = get_settings()
 24 |     avatar_image_path = Path(avatar_image_path)
 25 |     upload_files_to_comfyui_server([avatar_image_path, audio_file_path])
 26 |     workflow = _create_workflow(avatar_image_path, audio_file_path)
 27 |     queue_history = await queue_task(workflow, settings)
 28 |     video_url = _get_video_url(settings, queue_history)
 29 |     output_path = _save_video_to_file(video_url)
 30 |     return output_path
 31 | 
 32 | 
 33 | def upload_files_to_comfyui_server(files: list[Path]) -> None:
 34 |     """
 35 |     Upload files to the server.
 36 |     """
 37 |     settings = get_settings()
 38 |     comfy_server_url = settings.comfy_server_url
 39 |     logger.info(f"Uploading Files to ComfyUI Server at {comfy_server_url} ...")
 40 |     for file in files:
 41 |         with open(file, "rb") as f:
 42 |             to_upload_files = {
 43 |                 'image': (file.name, f, 'image/' + file.suffix[1:])
 44 |             }
 45 |             response =requests.post(f"{comfy_server_url}/upload/image", files=to_upload_files)
 46 |             if response.status_code == 200:
 47 |                 logger.success(f"Uploaded {file.name}")
 48 |             else:
 49 |                 logger.error(f"Failed to upload {file.name}. Status code: {response.status_code}")
 50 | 
 51 | 
 52 | def _create_workflow(avatar_image_path: Path, audio_file_path: Path) -> dict:
 53 |     """
 54 |     Create a workflow for the video generation.
 55 |     """
 56 |     with open(Path(WORKFLOW_FOLDER, "audio_image_to_video_with_sonic.json"), "r") as f:
 57 |         workflow = json.load(f)
 58 |     workflow["6"]["inputs"]["duration"] = _get_audio_file_duration(audio_file_path)
 59 |     workflow["7"]["inputs"]["image"] = avatar_image_path.name
 60 |     workflow["9"]["inputs"]["audio"] = audio_file_path.name
 61 |     return {"prompt": workflow}
 62 | 
 63 | 
 64 | def _get_audio_file_duration(audio_file_path: Path) -> int:
 65 |     """
 66 |     Get the duration of the audio file.
 67 |     """
 68 |     info = sf.info(str(audio_file_path))
 69 |     return round(info.duration, 2) + 2
 70 | 
 71 | async def queue_task(workflow: dict, settings: Settings) -> QueueHistory:
 72 |     """
 73 |     Queue a task to the server.
 74 |     """
 75 |     response1 = _queue_prompt(workflow, settings)
 76 |     if response1 is None:
 77 |         logger.error("Failed to queue the prompt.")
 78 |         return
 79 | 
 80 |     prompt_id = response1['prompt_id']
 81 |     logger.info(f'Prompt ID: {prompt_id}')
 82 |     logger.info('-' * 20)
 83 |     while True:
 84 |         time.sleep(5)
 85 |         queue_response = _get_queue(settings.comfy_server_url)
 86 |         if queue_response is None:
 87 |             continue
 88 | 
 89 |         queue_pending = queue_response.get('queue_pending', [])
 90 |         queue_running = queue_response.get('queue_running', [])
 91 | 
 92 |         # Check position in queue
 93 |         for position, item in enumerate(queue_pending):
 94 |             if item[1] == prompt_id:
 95 |                 logger.info(f'Queue running: {len(queue_running)}, Queue pending: {len(queue_pending)}, Workflow is in position {position + 1} in the queue.')
 96 | 
 97 |         # Check if the prompt is currently running
 98 |         for item in queue_running:
 99 |             if item[1] == prompt_id:
100 |                 logger.info(f'Queue running: {len(queue_running)}, Queue pending: {len(queue_pending)}, Workflow is currently running.')
101 |                 break
102 | 
103 |         if not any(prompt_id in item for item in queue_pending + queue_running):
104 |             break
105 |     
106 |     response = _get_history(settings.comfy_server_url, prompt_id)
107 |     return QueueHistory(prompt_id=prompt_id, response=response)
108 | 
109 | 
110 | def _queue_prompt(prompt: str, settings: Settings):
111 |     data = json.dumps(prompt).encode('utf-8')
112 |     prompt_url = f"{settings.comfy_server_url}/prompt"
113 |     try:
114 |         r = requests.post(prompt_url, data=data, headers={"Content-Type": "application/json"})
115 |         r.raise_for_status()
116 |         return r.json()
117 |     except requests.exceptions.RequestException as ex:
118 |         logger.error(f'POST {prompt_url} failed: {ex}')
119 |         return None
120 |     
121 | 
122 | def _get_queue(url):
123 |     queue_url = f"{url}/queue"
124 |     try:
125 |         r = requests.get(queue_url)
126 |         r.raise_for_status()
127 |         return r.json()
128 |     except requests.exceptions.RequestException as ex:
129 |         print(f'GET {queue_url} failed: {ex}')
130 |         return None
131 |     
132 | 
133 | def _get_history(url, prompt_id):
134 |     history_url = f"{url}/history/{prompt_id}"
135 |     try:
136 |         r = requests.get(history_url)
137 |         r.raise_for_status()
138 |         return r.json()
139 |     except requests.exceptions.RequestException as ex:
140 |         print(f'GET {history_url} failed: {ex}')
141 |         return None
142 | 
143 | 
144 | def _get_video_url(settings: Settings, queue_history: QueueHistory) -> str:
145 |     """
146 |     Get the video URL from the history response.
147 |     """
148 |     output_info = queue_history.response.get(queue_history.prompt_id, {}).get('outputs', {}).get('8', {}).get('gifs', [{}])[0]
149 |     filename = output_info.get('filename', 'unknown.png')
150 |     output_url = f"{settings.comfy_server_url}/api/viewvideo?filename={filename}"
151 |     logger.success(f"Output URL: {output_url}")
152 |     return output_url
153 | 
154 | def _save_video_to_file(video_url: str) -> Path:
155 |     """
156 |     Save the video to a file.
157 |     """
158 |     output_path = GENERATED_VIDEO_PATH / f"{uuid.uuid4().hex}.mp4"
159 |     output_path.parent.mkdir(parents=True, exist_ok=True)
160 |     response = requests.get(video_url)
161 |     with open(output_path, "wb") as f:
162 |         f.write(response.content)
163 |     logger.success(f"Video saved to {output_path}")
164 |     return output_path


--------------------------------------------------------------------------------
/github_assets/hpi-logo-white.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" width="300" height="132.23" viewBox="0 0 500 232.23">
 2 |   <defs>
 3 |     <style>
 4 |       .cls-1 {
 5 |         fill: #fff;
 6 |       }
 7 |     </style>
 8 |   </defs>
 9 |   <g id="Gruppe_205702" data-name="Gruppe 205702" transform="translate(-3 0.001)">
10 |     <g id="Gruppe_207561" data-name="Gruppe 207561" transform="translate(3 -0.001)">
11 |       <path id="Pfad_132769" data-name="Pfad 132769" class="cls-1" d="M20.574,171.568H192.145V0H20.574Zm58.094-71.4a1.351,1.351,0,0,1-1.348,1.348h-6.67a1.353,1.353,0,0,1-1.351-1.348V79.541H50.156v20.627a1.352,1.352,0,0,1-1.348,1.348H42.135a1.352,1.352,0,0,1-1.348-1.348V52.713a1.3,1.3,0,0,1,1.348-1.281h6.673a1.3,1.3,0,0,1,1.348,1.281V71.318H69.3V52.713a1.3,1.3,0,0,1,1.351-1.281h6.67a1.3,1.3,0,0,1,1.348,1.281Zm45.007-33.1c0,11.929-9.031,15.772-19.413,15.772-1.754,0-4.45-.2-6-.268v17.592a1.3,1.3,0,0,1-1.281,1.348H90.244a1.353,1.353,0,0,1-1.351-1.348V53.793c0-1.348.808-1.818,2.156-2.022a92.036,92.036,0,0,1,13.213-.875c10.382,0,19.413,3.639,19.413,15.57Zm17.172,33.1a1.352,1.352,0,0,1-1.345,1.348h-6.676a1.352,1.352,0,0,1-1.345-1.348V52.713a1.3,1.3,0,0,1,1.345-1.281H139.5a1.3,1.3,0,0,1,1.345,1.281Z" transform="translate(39.46)"/>
12 |       <path id="Pfad_132770" data-name="Pfad 132770" class="cls-1" d="M53.2,20.1c-1.415,0-4.785.134-6,.268V36.409c1.08.067,4.853.2,6,.2,7.28,0,10.111-2.7,10.111-8.092v-.6c0-5.664-3.37-7.82-10.111-7.82" transform="translate(90.524 38.547)"/>
13 |       <path id="Pfad_132771" data-name="Pfad 132771" class="cls-1" d="M56.034,9.921H0V212.568H202.651V156.537H56.034Z" transform="translate(0 19.027)"/>
14 |       <path id="Pfad_132772" data-name="Pfad 132772" class="cls-1" d="M88.8,67.519h6.673a1.355,1.355,0,0,0,1.351-1.348V45.547h19.142V66.171a1.352,1.352,0,0,0,1.348,1.348h6.673a1.352,1.352,0,0,0,1.348-1.348V18.719a1.3,1.3,0,0,0-1.348-1.281h-6.673a1.3,1.3,0,0,0-1.348,1.281V37.324H96.825V18.719a1.3,1.3,0,0,0-1.351-1.281H88.8a1.3,1.3,0,0,0-1.348,1.281V66.171A1.352,1.352,0,0,0,88.8,67.519m56.215.54a18.27,18.27,0,0,0,9.507-2.965v1.077a1.352,1.352,0,0,0,1.348,1.348H161.6a1.351,1.351,0,0,0,1.348-1.348V41.905c0-8.626-5.258-12.874-14.829-12.874a58.061,58.061,0,0,0-11.255,1.08c-.878.2-1.281.674-1.281,1.415v4.313c0,.945.54,1.147,1.214,1.147h.338a106.589,106.589,0,0,1,10.983-.54c3.977,0,5.932,1.08,5.932,5.46v2.968h-4.243c-11.73,0-16.452,3.977-16.452,11.66V56.8c0,9.1,6.069,11.258,11.66,11.258m-2.69-11.526c0-3.773,1.345-5.728,7.482-5.728h4.243V59.1a14.746,14.746,0,0,1-6.872,1.885c-3.373,0-4.853-.875-4.853-4.179Zm33.7-8.089,9.232,5.664c2.156,1.348,2.968,2.156,2.968,3.437,0,1.821-.607,3.233-4.786,3.233-2.022,0-8.092-.336-11.325-.741h-.268a1.125,1.125,0,0,0-1.147,1.08v4.044a1.523,1.523,0,0,0,1.281,1.549,55.714,55.714,0,0,0,12.066,1.348c8.83,0,13.21-4.651,13.21-10.513,0-3.437-.805-6.811-6.469-10.245l-8.833-5.4c-1.684-1.01-2.693-1.684-2.693-2.763,0-2.089,1.412-2.83,4.648-2.83a99.634,99.634,0,0,1,11.123.808h.268c.674,0,1.08-.47,1.08-1.281V31.932a1.47,1.47,0,0,0-1.214-1.552,49.961,49.961,0,0,0-12-1.348c-8.558,0-12.807,3.37-12.807,10.111a11.1,11.1,0,0,0,5.664,9.3M223.745,47.3l-8.827-5.4c-1.687-1.01-2.7-1.684-2.7-2.763,0-2.089,1.415-2.83,4.648-2.83a99.573,99.573,0,0,1,11.12.808h.271c.674,0,1.08-.47,1.08-1.281V31.932a1.47,1.47,0,0,0-1.214-1.552,49.98,49.98,0,0,0-12-1.348c-8.561,0-12.807,3.37-12.807,10.111a11.109,11.109,0,0,0,5.664,9.3l9.232,5.664c2.159,1.348,2.965,2.156,2.965,3.437,0,1.821-.607,3.233-4.783,3.233-2.025,0-8.089-.336-11.325-.741h-.269a1.128,1.128,0,0,0-1.15,1.08v4.044a1.526,1.526,0,0,0,1.281,1.549A55.734,55.734,0,0,0,217,68.059c8.833,0,13.213-4.651,13.213-10.513,0-3.437-.808-6.811-6.472-10.245m28.85,20.758c10.584,0,16.11-7.143,16.11-16.784v-5.46c0-9.638-5.527-16.784-16.11-16.784s-16.107,7.146-16.107,16.784v5.46c0,9.641,5.527,16.784,16.107,16.784m-7.21-22.244c0-5.527,2.492-9.1,7.21-9.1s7.213,3.572,7.213,9.1v5.46c0,5.527-2.495,9.1-7.213,9.1s-7.21-3.575-7.21-9.1ZM122.237,98.438v-.607c0-11.929-9.034-15.57-19.413-15.57a91.937,91.937,0,0,0-13.213.878c-1.348.2-2.159.674-2.159,2.022v46.372a1.352,1.352,0,0,0,1.348,1.348h6.743a1.3,1.3,0,0,0,1.281-1.348V113.941c1.549.067,4.246.271,6,.271,10.379,0,19.413-3.843,19.413-15.775m-9.305,0c0,5.392-2.83,8.092-10.108,8.092-1.15,0-4.923-.137-6-.2V90.282c1.211-.134,4.581-.271,6-.271,6.738,0,10.108,2.159,10.108,7.82Zm17.257,34.444h6.268a1.356,1.356,0,0,0,1.281-1.415V82.935a1.3,1.3,0,0,0-1.281-1.348h-6.268a1.353,1.353,0,0,0-1.351,1.348v48.532a1.412,1.412,0,0,0,1.351,1.415m35.923-2.425v1.077a1.353,1.353,0,0,0,1.351,1.348h5.728a1.351,1.351,0,0,0,1.348-1.348V107.271c0-8.628-5.258-12.877-14.826-12.877a58.278,58.278,0,0,0-11.258,1.08c-.878.2-1.278.674-1.278,1.415V101.2c0,.945.537,1.147,1.211,1.147h.336a106.765,106.765,0,0,1,10.989-.54c3.977,0,5.929,1.077,5.929,5.462v2.965H161.4c-11.727,0-16.446,3.977-16.446,11.66v.271c0,9.1,6.066,11.255,11.66,11.255a18.242,18.242,0,0,0,9.5-2.965m-12.2-8.29V121.9c0-3.773,1.348-5.728,7.482-5.728h4.246v8.29a14.738,14.738,0,0,1-6.875,1.885c-3.37,0-4.853-.875-4.853-4.176m32.352.54c0,9.031,3.1,10.715,10.718,10.715a27.351,27.351,0,0,0,4.584-.473c.875-.134,1.281-.537,1.281-1.211v-4.246c0-.677-.537-1.013-1.415-1.013h-3.575c-2.425,0-2.626-.4-2.626-3.773V101.54h4.126a1.355,1.355,0,0,0,1.351-1.348V96.215a1.353,1.353,0,0,0-1.351-1.348h-4.126V87.925a.974.974,0,0,0-1.077-1.013h-.271l-6.271,1.08a1.448,1.448,0,0,0-1.348,1.345v5.53h-6.332a1.353,1.353,0,0,0-1.351,1.348v3.032a1.452,1.452,0,0,0,1.351,1.351l6.332.943Zm40.108,3.773H222.8c-2.428,0-2.629-.4-2.629-3.773V101.54h6.066a1.354,1.354,0,0,0,1.348-1.348V96.215a1.352,1.352,0,0,0-1.348-1.348h-6.066V87.925a.974.974,0,0,0-1.08-1.013h-.268l-6.268,1.08a1.446,1.446,0,0,0-1.348,1.345v5.53h-4.386a1.352,1.352,0,0,0-1.348,1.348v3.032a1.451,1.451,0,0,0,1.348,1.351l4.386.943v21.167c0,9.031,3.1,10.715,10.715,10.715a27.352,27.352,0,0,0,4.584-.473c.875-.134,1.281-.537,1.281-1.211v-4.246c0-.677-.537-1.013-1.415-1.013m39.836,4.987V109.491c0-8.561-1.955-15.1-11.66-15.1a17.464,17.464,0,0,0-10.919,3.236V96.215a1.353,1.353,0,0,0-1.351-1.348h-5.862a1.352,1.352,0,0,0-1.348,1.348v35.252a1.412,1.412,0,0,0,1.348,1.415h6.2a1.412,1.412,0,0,0,1.348-1.415v-27.1c2.83-1.485,5.328-2.425,7.616-2.425,5.191,0,5.731,1.684,5.731,7.546v21.975a1.412,1.412,0,0,0,1.348,1.415h6.2a1.411,1.411,0,0,0,1.345-1.415m38.015-14.491c.945,0,1.348-.607,1.348-1.687v-3.773c0-10.044-5.661-17.123-16.04-17.123-10.315,0-16.177,6.606-16.177,17.123V116.5c0,13.28,9.571,16.918,16.311,16.918a59.841,59.841,0,0,0,13.078-1.147c1.211-.2,1.412-.674,1.412-1.751v-4.447c0-.674-.54-1.013-1.214-1.013h-.2c-2.833.271-7.482.674-12.4.674-6.472,0-8.089-4.651-8.089-8.561v-.2Zm-21.9-6.606c0-5.325,3.029-8.29,7.21-8.29s7.079,3.032,7.079,8.29v.271H282.323Zm48.6-15.976a15.687,15.687,0,0,0-9.644,3.236V96.215a1.352,1.352,0,0,0-1.345-1.348h-5.862a1.353,1.353,0,0,0-1.351,1.348v35.252a1.412,1.412,0,0,0,1.351,1.415h6.2a1.409,1.409,0,0,0,1.345-1.415v-27.1a19.084,19.084,0,0,1,9.308-2.425,1.356,1.356,0,0,0,1.348-1.348V95.742a1.358,1.358,0,0,0-1.348-1.348M95.6,148.163H88.867a1.3,1.3,0,0,0-1.36,1.278V196.9a1.357,1.357,0,0,0,1.36,1.351H95.6a1.36,1.36,0,0,0,1.363-1.351V149.441a1.3,1.3,0,0,0-1.363-1.278m32.611,11.593a17.78,17.78,0,0,0-11.03,3.233v-1.415a1.359,1.359,0,0,0-1.363-1.345H109.9a1.358,1.358,0,0,0-1.36,1.345v35.255a1.42,1.42,0,0,0,1.36,1.418h6.265a1.418,1.418,0,0,0,1.36-1.418v-27.1c2.863-1.485,5.381-2.425,7.695-2.425,5.241,0,5.789,1.684,5.789,7.546v21.975a1.42,1.42,0,0,0,1.36,1.418h6.262a1.421,1.421,0,0,0,1.363-1.418V174.854c0-8.561-1.973-15.1-11.777-15.1m41.937,18.267-8.92-5.392c-1.7-1.013-2.725-1.687-2.725-2.763,0-2.089,1.433-2.83,4.7-2.83a100.812,100.812,0,0,1,11.231.808h.271c.68,0,1.091-.47,1.091-1.281v-3.907a1.475,1.475,0,0,0-1.226-1.552,50.813,50.813,0,0,0-12.121-1.348c-8.64,0-12.935,3.37-12.935,10.111a11.108,11.108,0,0,0,5.719,9.305l9.329,5.661c2.18,1.348,2.994,2.153,2.994,3.437,0,1.818-.613,3.236-4.835,3.236-2.04,0-8.167-.338-11.433-.741h-.274a1.13,1.13,0,0,0-1.158,1.077v4.044a1.514,1.514,0,0,0,1.293,1.544,56.526,56.526,0,0,0,12.191,1.354c8.914,0,13.341-4.651,13.341-10.513,0-3.437-.817-6.808-6.533-10.248m35.4,13.82h-3.61c-2.451,0-2.655-.4-2.655-3.776V166.9h6.128a1.359,1.359,0,0,0,1.363-1.348v-3.977a1.359,1.359,0,0,0-1.363-1.345h-6.128v-6.945a.978.978,0,0,0-1.091-1.01h-.268l-6.332,1.077a1.458,1.458,0,0,0-1.363,1.348v5.53h-6.4a1.361,1.361,0,0,0-1.363,1.345v3.035a1.46,1.46,0,0,0,1.363,1.351l6.4.94v21.167c0,9.028,3.131,10.718,10.826,10.718a28.158,28.158,0,0,0,4.628-.47c.887-.137,1.293-.54,1.293-1.22v-4.243c0-.674-.543-1.01-1.427-1.01m17.426-31.613h-6.262a1.359,1.359,0,0,0-1.363,1.345v35.255a1.421,1.421,0,0,0,1.363,1.418h6.262a1.42,1.42,0,0,0,1.36-1.418V161.574a1.358,1.358,0,0,0-1.36-1.345m.2-13.682h-6.606a1.421,1.421,0,0,0-1.43,1.348v5.929a1.421,1.421,0,0,0,1.43,1.348h6.606a1.357,1.357,0,0,0,1.36-1.348v-5.929a1.357,1.357,0,0,0-1.36-1.348m32.27,45.3h-3.607c-2.451,0-2.658-.4-2.658-3.776V166.9h6.128a1.361,1.361,0,0,0,1.363-1.348v-3.977a1.361,1.361,0,0,0-1.363-1.345H249.19v-6.945a.977.977,0,0,0-1.088-1.01h-.274l-6.329,1.077a1.458,1.458,0,0,0-1.363,1.348v5.53h-6.4a1.359,1.359,0,0,0-1.363,1.345v3.035a1.458,1.458,0,0,0,1.363,1.351l6.4.94v21.167c0,9.028,3.134,10.718,10.823,10.718a28.226,28.226,0,0,0,4.634-.47c.884-.137,1.293-.54,1.293-1.22v-4.243c0-.674-.546-1.01-1.43-1.01m39.687-31.613H288.88a1.413,1.413,0,0,0-1.36,1.412v26.965c-1.908,1.01-5.177,2.7-7.695,2.7a5.945,5.945,0,0,1-4.222-1.214c-1.29-1.147-1.567-3.1-1.567-6.469V161.641a1.413,1.413,0,0,0-1.36-1.412h-6.262a1.415,1.415,0,0,0-1.363,1.412V183.62c0,5.594.887,9.775,3.405,12.267,1.975,1.955,4.969,2.825,8.375,2.825a17.687,17.687,0,0,0,10.689-3.362V196.9a1.357,1.357,0,0,0,1.36,1.351h6.262A1.36,1.36,0,0,0,296.5,196.9V161.641a1.415,1.415,0,0,0-1.363-1.412m32.27,31.613h-3.607c-2.454,0-2.655-.4-2.655-3.776V166.9h6.128a1.357,1.357,0,0,0,1.36-1.348v-3.977a1.356,1.356,0,0,0-1.36-1.345H321.15v-6.945a.979.979,0,0,0-1.088-1.01h-.271l-6.332,1.077A1.458,1.458,0,0,0,312.1,154.7v5.53h-6.4a1.359,1.359,0,0,0-1.363,1.345v3.035a1.458,1.458,0,0,0,1.363,1.351l6.4.94v21.167c0,9.028,3.131,10.718,10.826,10.718a28.221,28.221,0,0,0,4.628-.47c.884-.137,1.293-.54,1.293-1.22v-4.243c0-.674-.543-1.01-1.43-1.01" transform="translate(167.73 33.446)"/>
15 |     </g>
16 |   </g>
17 | </svg>
18 | 


--------------------------------------------------------------------------------
/cloud_setup/runpod_templates/comfy_ui/2025-04-26_11-26-04_snapshot.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "comfyui": "75c1c757d90ca891eff823893248ef8b51d31d01",
  3 |     "git_custom_nodes": {
  4 |         "https://github.com/ShmuelRonen/ComfyUI-LatentSyncWrapper": {
  5 |             "hash": "920c15ea8803f1b9cfdd504630338a21330c0eb5",
  6 |             "disabled": false
  7 |         },
  8 |         "https://github.com/Comfy-Org/ComfyUI-Manager.git": {
  9 |             "hash": "50fc1389b0184d93765aefbbc4186fcdac96bdf9",
 10 |             "disabled": false
 11 |         },
 12 |         "https://github.com/kijai/ComfyUI-WanVideoWrapper.git": {
 13 |             "hash": "f2bc29b931983e279d25452d284b3888c8c81346",
 14 |             "disabled": false
 15 |         },
 16 |         "https://github.com/smthemex/ComfyUI_Sonic": {
 17 |             "hash": "6595df02e72406fe23baebc12ac2088832d7ff9b",
 18 |             "disabled": false
 19 |         },
 20 |         "https://github.com/kijai/ComfyUI-KJNodes": {
 21 |             "hash": "b7e5b6f1e2b7c79b3f1e4b4bfe5e1687715803ab",
 22 |             "disabled": false
 23 |         }
 24 |     },
 25 |     "cnr_custom_nodes": {
 26 |         "comfyui-depthanythingv2": "1.0.0",
 27 |         "comfyui-frame-interpolation": "1.0.6",
 28 |         "comfyui-impact-pack": "8.8.0",
 29 |         "comfyui-videohelpersuite": "1.3.1",
 30 |         "comfyui_essentials": "1.1.0",
 31 |         "rgthree-comfy": "1.0.0"
 32 |     },
 33 |     "file_custom_nodes": [
 34 |         {
 35 |             "filename": "websocket_image_save.py",
 36 |             "disabled": false
 37 |         }
 38 |     ],
 39 |     "pips": {
 40 |         "accelerate==1.6.0": "",
 41 |         "aiohappyeyeballs==2.6.1": "",
 42 |         "aiohttp==3.11.16": "",
 43 |         "aiosignal==1.3.1": "",
 44 |         "antlr4-python3-runtime==4.9.3": "",
 45 |         "anyio==4.2.0": "",
 46 |         "argon2-cffi==23.1.0": "",
 47 |         "argon2-cffi-bindings==21.2.0": "",
 48 |         "arrow==1.3.0": "",
 49 |         "asttokens==2.4.1": "",
 50 |         "async-lru==2.0.4": "",
 51 |         "async-timeout==4.0.3": "",
 52 |         "attrs==23.2.0": "",
 53 |         "audioread==3.0.1": "",
 54 |         "av==14.3.0": "",
 55 |         "Babel==2.14.0": "",
 56 |         "bash_kernel==0.9.3": "",
 57 |         "beautifulsoup4==4.12.3": "",
 58 |         "bleach==6.1.0": "",
 59 |         "build==1.2.1": "",
 60 |         "CacheControl==0.14.0": "",
 61 |         "certifi==2022.12.7": "",
 62 |         "cffi==1.17.0": "",
 63 |         "chardet==5.2.0": "",
 64 |         "charset-normalizer==2.1.1": "",
 65 |         "cleo==2.1.0": "",
 66 |         "click==8.1.8": "",
 67 |         "cm-jupyter-eg-kernel-wlm==4.0.1": "",
 68 |         "cmake==3.25.0": "",
 69 |         "color-matcher==0.6.0": "",
 70 |         "colorama==0.4.6": "",
 71 |         "comfyui_frontend_package==1.14.5": "",
 72 |         "comm==0.2.2": "",
 73 |         "contourpy==1.3.2": "",
 74 |         "crashtest==0.4.1": "",
 75 |         "cryptography==43.0.0": "",
 76 |         "csvw==3.5.1": "",
 77 |         "cycler==0.12.1": "",
 78 |         "ddt==1.7.2": "",
 79 |         "debugpy==1.8.5": "",
 80 |         "decorator==5.1.1": "",
 81 |         "defusedxml==0.7.1": "",
 82 |         "Deprecated==1.2.18": "",
 83 |         "diffusers==0.32.2": "",
 84 |         "distlib==0.3.8": "",
 85 |         "dlinfo==2.0.0": "",
 86 |         "docutils==0.21.2": "",
 87 |         "dulwich==0.21.7": "",
 88 |         "einops==0.7.0": "",
 89 |         "entrypoints==0.4": "",
 90 |         "exceptiongroup==1.2.2": "",
 91 |         "executing==2.0.1": "",
 92 |         "fastjsonschema==2.19.1": "",
 93 |         "filelock==3.15.4": "",
 94 |         "fonttools==4.57.0": "",
 95 |         "fqdn==1.5.1": "",
 96 |         "frozenlist==1.4.1": "",
 97 |         "fsspec==2024.2.0": "",
 98 |         "ftfy==6.3.1": "",
 99 |         "gdown==5.1.0": "",
100 |         "gitdb==4.0.11": "",
101 |         "GitPython==3.1.41": "",
102 |         "h11==0.14.0": "",
103 |         "httpcore==1.0.2": "",
104 |         "httpx==0.26.0": "",
105 |         "huggingface-hub==0.24.5": "",
106 |         "idna==3.4": "",
107 |         "imageio==2.37.0": "",
108 |         "imageio-ffmpeg==0.6.0": "",
109 |         "importlib_metadata==8.2.0": "",
110 |         "installer==0.7.0": "",
111 |         "ipykernel==6.29.5": "",
112 |         "ipython==8.18.1": "",
113 |         "ipython-genutils==0.2.0": "",
114 |         "isodate==0.7.2": "",
115 |         "isoduration==20.11.0": "",
116 |         "jaraco.classes==3.4.0": "",
117 |         "jedi==0.19.1": "",
118 |         "jeepney==0.8.0": "",
119 |         "Jinja2==3.1.2": "",
120 |         "joblib==1.4.2": "",
121 |         "json5==0.9.14": "",
122 |         "jsonpointer==2.4": "",
123 |         "jsonschema==4.21.1": "",
124 |         "jsonschema-specifications==2023.12.1": "",
125 |         "jupyter-events==0.12.0": "",
126 |         "jupyter-lsp==2.2.5": "",
127 |         "jupyter_client==8.6.2": "",
128 |         "jupyter_core==5.7.2": "",
129 |         "jupyter_server==2.15.0": "",
130 |         "jupyter_server_terminals==0.5.3": "",
131 |         "jupyterlab==4.1.0": "",
132 |         "jupyterlab_pygments==0.3.0": "",
133 |         "jupyterlab_server==2.25.2": "",
134 |         "kanjize==1.6.0": "",
135 |         "keyring==24.3.1": "",
136 |         "kiwisolver==1.4.8": "",
137 |         "kornia==0.8.0": "",
138 |         "kornia_rs==0.1.8": "",
139 |         "language-tags==1.2.0": "",
140 |         "lazy_loader==0.4": "",
141 |         "librosa==0.11.0": "",
142 |         "lit==15.0.7": "",
143 |         "llvmlite==0.44.0": "",
144 |         "lxml==5.1.0": "",
145 |         "markdown-it-py==3.0.0": "",
146 |         "MarkupSafe==2.1.3": "",
147 |         "matplotlib==3.10.1": "",
148 |         "matplotlib-inline==0.1.7": "",
149 |         "matrix-client==0.4.0": "",
150 |         "mdurl==0.1.2": "",
151 |         "mistune==3.0.2": "",
152 |         "more-itertools==10.4.0": "",
153 |         "mpmath==1.3.0": "",
154 |         "msgpack==1.0.8": "",
155 |         "mss==10.0.0": "",
156 |         "multidict==6.0.5": "",
157 |         "nbclassic==1.0.0": "",
158 |         "nbclient==0.9.0": "",
159 |         "nbconvert==7.15.0": "",
160 |         "nbformat==5.9.2": "",
161 |         "nest-asyncio==1.6.0": "",
162 |         "networkx==3.2.1": "",
163 |         "notebook_shim==0.2.4": "",
164 |         "numba==0.61.2": "",
165 |         "numpy==1.26.3": "",
166 |         "nvidia-cublas-cu12==12.1.3.1": "",
167 |         "nvidia-cuda-cupti-cu12==12.1.105": "",
168 |         "nvidia-cuda-nvrtc-cu12==12.1.105": "",
169 |         "nvidia-cuda-runtime-cu12==12.1.105": "",
170 |         "nvidia-cudnn-cu12==9.1.0.70": "",
171 |         "nvidia-cufft-cu12==11.0.2.54": "",
172 |         "nvidia-curand-cu12==10.3.2.106": "",
173 |         "nvidia-cusolver-cu12==11.4.5.107": "",
174 |         "nvidia-cusparse-cu12==12.1.0.106": "",
175 |         "nvidia-nccl-cu12==2.20.5": "",
176 |         "nvidia-nvjitlink-cu12==12.8.93": "",
177 |         "nvidia-nvtx-cu12==12.1.105": "",
178 |         "omegaconf==2.3.0": "",
179 |         "opencv-python==4.11.0.86": "",
180 |         "overrides==7.7.0": "",
181 |         "packaging==25.0": "",
182 |         "pandocfilters==1.5.1": "",
183 |         "parso==0.8.4": "",
184 |         "pexpect==4.9.0": "",
185 |         "phonemizer==3.3.0": "",
186 |         "piexif==1.1.3": "",
187 |         "pillow==11.2.1": "",
188 |         "pkginfo==1.11.1": "",
189 |         "platformdirs==4.2.2": "",
190 |         "poetry-core==1.9.0": "",
191 |         "pooch==1.8.2": "",
192 |         "prometheus-client==0.19.0": "",
193 |         "prompt-toolkit==3.0.43": "",
194 |         "propcache==0.3.1": "",
195 |         "protobuf==5.27.3": "",
196 |         "psutil==5.9.8": "",
197 |         "ptyprocess==0.7.0": "",
198 |         "pure_eval==0.2.3": "",
199 |         "py-espeak-ng==0.1.8": "",
200 |         "pycparser==2.21": "",
201 |         "pycryptodome==3.20.0": "",
202 |         "PyGithub==2.6.1": "",
203 |         "Pygments==2.17.2": "",
204 |         "PyJWT==2.10.1": "",
205 |         "PyNaCl==1.5.0": "",
206 |         "pyparsing==3.2.3": "",
207 |         "pyproject_hooks==1.2.0": "",
208 |         "PySocks==1.7.1": "",
209 |         "python-dateutil==2.8.2": "",
210 |         "python-json-logger==3.3.0": "",
211 |         "PyYAML==6.0.1": "",
212 |         "pyzmq==26.2.0": "",
213 |         "RapidFuzz==3.13.0": "",
214 |         "rdflib==7.1.4": "",
215 |         "referencing==0.36.2": "",
216 |         "regex==2024.11.6": "",
217 |         "requests==2.32.3": "",
218 |         "rfc3339-validator==0.1.4": "",
219 |         "rfc3986==1.5.0": "",
220 |         "rfc3986-validator==0.1.1": "",
221 |         "rich==14.0.0": "",
222 |         "rpds-py==0.24.0": "",
223 |         "safetensors==0.5.3": "",
224 |         "sageattention==1.0.6": "",
225 |         "scikit-image==0.25.2": "",
226 |         "scikit-learn==1.6.1": "",
227 |         "scipy==1.12.0": "",
228 |         "SecretStorage==3.3.3": "",
229 |         "segment-anything==1.0": "",
230 |         "segments==2.3.0": "",
231 |         "Send2Trash==1.8.3": "",
232 |         "sentencepiece==0.2.0": "",
233 |         "shellingham==1.5.4": "",
234 |         "six==1.17.0": "",
235 |         "smmap==5.0.2": "",
236 |         "sniffio==1.3.1": "",
237 |         "soundfile==0.13.1": "",
238 |         "soupsieve==2.6": "",
239 |         "soxr==0.5.0.post1": "",
240 |         "spandrel==0.4.1": "",
241 |         "stack-data==0.6.3": "",
242 |         "SudachiDict-full==20250129": "",
243 |         "SudachiPy==0.6.10": "",
244 |         "sympy==1.13.3": "",
245 |         "terminado==0.18.1": "",
246 |         "threadpoolctl==3.6.0": "",
247 |         "tifffile==2025.3.30": "",
248 |         "tinycss2==1.4.0": "",
249 |         "tokenizers==0.21.1": "",
250 |         "toml==0.10.2": "",
251 |         "tomli==2.2.1": "",
252 |         "torch==2.4.0": "",
253 |         "torchaudio==2.4.0": "",
254 |         "torchsde==0.2.6": "",
255 |         "torchvision==0.19.0": "",
256 |         "tornado==6.4.1": "",
257 |         "tqdm==4.67.1": "",
258 |         "traitlets==5.14.3": "",
259 |         "trampoline==0.1.2": "",
260 |         "transformers==4.48.3": "",
261 |         "triton==3.0.0": "",
262 |         "typer==0.15.2": "",
263 |         "types-python-dateutil==2.9.0.20241206": "",
264 |         "typing_extensions==4.13.2": "",
265 |         "uri-template==1.3.0": "",
266 |         "uritemplate==4.1.1": "",
267 |         "urllib3==1.26.20": "",
268 |         "uv==0.6.14": "",
269 |         "wcwidth==0.2.13": "",
270 |         "webcolors==24.11.1": "",
271 |         "webencodings==0.5.1": "",
272 |         "websocket-client==1.8.0": "",
273 |         "# Editable Git install with no remote (workspace==0.1.0)": "",
274 |         "-e /home/jeremy.degail": "",
275 |         "wrapt==1.17.2": "",
276 |         "xformers==0.0.27.post2": "",
277 |         "yarl==1.20.0": "",
278 |         "zipp==3.21.0": ""
279 |     }
280 | }


--------------------------------------------------------------------------------