├── .github
    ├── ISSUE_TEMPLATE
    │   └── bug_report.md
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   └── lint.yml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── Dockerfile
├── README.md
├── assets
    ├── img_lightx2v.jpg
    └── inputs
    │   └── imgs
    │       ├── img_0.jpg
    │       └── img_1.jpg
├── configs
    ├── advanced_ptq
    │   └── wan_i2v.json
    ├── caching
    │   ├── hunyuan_i2v_TaylorSeer.json
    │   ├── hunyuan_i2v_Tea.json
    │   ├── hunyuan_t2v_TaylorSeer.json
    │   ├── hunyuan_t2v_Tea.json
    │   ├── wan_i2v_Tea.json
    │   └── wan_t2v_Tea.json
    ├── cogvideox_t2v.json
    ├── deploy
    │   ├── hunyuan_i2v.json
    │   ├── hunyuan_t2v.json
    │   ├── wan_i2v.json
    │   └── wan_t2v.json
    ├── dist
    │   ├── hunyuan_t2v_dist_ring.json
    │   └── hunyuan_t2v_dist_ulysses.json
    ├── hunyuan_i2v.json
    ├── hunyuan_i2v_save_quant.json
    ├── hunyuan_t2v.json
    ├── hunyuan_t2v_save_quant.json
    ├── offload
    │   ├── wan_i2v_block.json
    │   ├── wan_i2v_phase.json
    │   ├── wan_t2v_block.json
    │   └── wan_t2v_phase.json
    ├── wan_i2v.json
    ├── wan_i2v_causvid.json
    ├── wan_i2v_dist.json
    ├── wan_i2v_save_quant.json
    ├── wan_skyreels_v2_df.json
    ├── wan_skyreels_v2_i2v.json
    ├── wan_skyreels_v2_t2v.json
    ├── wan_t2v.json
    ├── wan_t2v_causvid.json
    ├── wan_t2v_causvid_save_quant.json
    ├── wan_t2v_dist.json
    ├── wan_t2v_save_quant.json
    └── wan_t2v_sparge.json
├── docs
    ├── en_US
    │   ├── 01.prepare_envs.md
    │   ├── 02.start_server.md
    │   └── 03.quantization.md
    └── zh_CN
    │   ├── 01.prepare_envs.md
    │   ├── 02.start_server.md
    │   └── 03.quantization.md
├── examples
    ├── diffusers
    │   └── converter.py
    └── vae_trt
    │   ├── convert_trt.sh
    │   └── convert_vae_trt_engine.py
├── lightx2v
    ├── __init__.py
    ├── api_multi_servers.py
    ├── api_server.py
    ├── attentions
    │   ├── __init__.py
    │   ├── common
    │   │   ├── __init__.py
    │   │   ├── flash_attn2.py
    │   │   ├── flash_attn3.py
    │   │   ├── sage_attn2.py
    │   │   └── torch_sdpa.py
    │   └── distributed
    │   │   ├── __init__.py
    │   │   ├── comm
    │   │       ├── __init__.py
    │   │       ├── all2all.py
    │   │       └── ring_comm.py
    │   │   ├── partial_heads_attn
    │   │       ├── __init__.py
    │   │       ├── attn.py
    │   │       ├── tests
    │   │       │   ├── __init__.py
    │   │       │   ├── test.sh
    │   │       │   └── test_acc.py
    │   │       └── wrap.py
    │   │   ├── ring
    │   │       ├── __init__.py
    │   │       ├── attn.py
    │   │       ├── tests
    │   │       │   ├── test.py
    │   │       │   └── test.sh
    │   │       └── wrap.py
    │   │   ├── ulysses
    │   │       ├── __init__.py
    │   │       ├── attn.py
    │   │       └── wrap.py
    │   │   └── utils
    │   │       ├── __init__.py
    │   │       ├── hunyuan
    │   │           └── processor.py
    │   │       ├── process.py
    │   │       └── wan
    │   │           └── processor.py
    ├── common
    │   ├── __init__.py
    │   ├── apis
    │   │   ├── dit.py
    │   │   ├── image_encoder.py
    │   │   ├── prompt_enhancer.py
    │   │   ├── text_encoder.py
    │   │   └── vae.py
    │   ├── backend_infer
    │   │   └── trt
    │   │   │   ├── common.py
    │   │   │   └── common_runtime.py
    │   ├── modules
    │   │   ├── __init__.py
    │   │   └── weight_module.py
    │   ├── offload
    │   │   └── manager.py
    │   └── ops
    │   │   ├── __init__.py
    │   │   ├── attn
    │   │       ├── __init__.py
    │   │       └── attn_weight.py
    │   │   ├── conv
    │   │       ├── __init__.py
    │   │       ├── conv2d.py
    │   │       └── conv3d.py
    │   │   ├── mm
    │   │       ├── __init__.py
    │   │       ├── mm_weight.py
    │   │       └── mm_weight_calib.py
    │   │   ├── norm
    │   │       ├── __init__.py
    │   │       ├── layer_norm_weight.py
    │   │       └── rms_norm_weight.py
    │   │   └── tensor
    │   │       ├── __init__.py
    │   │       └── tensor.py
    ├── infer.py
    ├── models
    │   ├── __init__.py
    │   ├── input_encoders
    │   │   └── hf
    │   │   │   ├── __init__.py
    │   │   │   ├── clip
    │   │   │       ├── __init__.py
    │   │   │       └── model.py
    │   │   │   ├── llama
    │   │   │       ├── __init__.py
    │   │   │       └── model.py
    │   │   │   ├── llava
    │   │   │       ├── __init__.py
    │   │   │       └── model.py
    │   │   │   ├── t5
    │   │   │       ├── __init__.py
    │   │   │       ├── model.py
    │   │   │       └── tokenizer.py
    │   │   │   ├── t5_v1_1_xxl
    │   │   │       ├── __init__.py
    │   │   │       └── model.py
    │   │   │   └── xlm_roberta
    │   │   │       ├── __init__.py
    │   │   │       ├── model.py
    │   │   │       └── xlm_roberta.py
    │   ├── networks
    │   │   ├── __init__.py
    │   │   ├── cogvideox
    │   │   │   ├── infer
    │   │   │   │   ├── post_infer.py
    │   │   │   │   ├── pre_infer.py
    │   │   │   │   └── transformer_infer.py
    │   │   │   ├── model.py
    │   │   │   └── weights
    │   │   │   │   ├── post_weights.py
    │   │   │   │   ├── pre_weights.py
    │   │   │   │   └── transformers_weights.py
    │   │   ├── hunyuan
    │   │   │   ├── __init__.py
    │   │   │   ├── infer
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── feature_caching
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── transformer_infer.py
    │   │   │   │   │   └── utils.py
    │   │   │   │   ├── post_infer.py
    │   │   │   │   ├── pre_infer.py
    │   │   │   │   ├── transformer_infer.py
    │   │   │   │   ├── utils.py
    │   │   │   │   ├── utils_bf16.py
    │   │   │   │   └── utils_fp32.py
    │   │   │   ├── model.py
    │   │   │   └── weights
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── post_weights.py
    │   │   │   │   ├── pre_weights.py
    │   │   │   │   └── transformer_weights.py
    │   │   └── wan
    │   │   │   ├── causvid_model.py
    │   │   │   ├── infer
    │   │   │       ├── causvid
    │   │   │       │   ├── __init__.py
    │   │   │       │   └── transformer_infer.py
    │   │   │       ├── feature_caching
    │   │   │       │   ├── __init__.py
    │   │   │       │   └── transformer_infer.py
    │   │   │       ├── post_infer.py
    │   │   │       ├── pre_infer.py
    │   │   │       ├── transformer_infer.py
    │   │   │       └── utils.py
    │   │   │   ├── lora_adapter.py
    │   │   │   ├── model.py
    │   │   │   └── weights
    │   │   │       ├── post_weights.py
    │   │   │       ├── pre_weights.py
    │   │   │       └── transformer_weights.py
    │   ├── runners
    │   │   ├── __init__.py
    │   │   ├── cogvideox
    │   │   │   ├── __init__.py
    │   │   │   └── cogvidex_runner.py
    │   │   ├── default_runner.py
    │   │   ├── graph_runner.py
    │   │   ├── hunyuan
    │   │   │   ├── __init__.py
    │   │   │   └── hunyuan_runner.py
    │   │   └── wan
    │   │   │   ├── __init__.py
    │   │   │   ├── wan_causvid_runner.py
    │   │   │   ├── wan_runner.py
    │   │   │   └── wan_skyreels_v2_df_runner.py
    │   ├── schedulers
    │   │   ├── __init__.py
    │   │   ├── cogvideox
    │   │   │   └── scheduler.py
    │   │   ├── hunyuan
    │   │   │   ├── feature_caching
    │   │   │   │   ├── scheduler.py
    │   │   │   │   └── utils.py
    │   │   │   └── scheduler.py
    │   │   ├── scheduler.py
    │   │   └── wan
    │   │   │   ├── causvid
    │   │   │       └── scheduler.py
    │   │   │   ├── df
    │   │   │       └── skyreels_v2_df_scheduler.py
    │   │   │   ├── feature_caching
    │   │   │       └── scheduler.py
    │   │   │   └── scheduler.py
    │   └── video_encoders
    │   │   ├── hf
    │   │       ├── __init__.py
    │   │       ├── autoencoder_kl_causal_3d
    │   │       │   ├── __init__.py
    │   │       │   ├── autoencoder_kl_causal_3d.py
    │   │       │   ├── model.py
    │   │       │   ├── unet_causal_3d_blocks.py
    │   │       │   └── vae.py
    │   │       ├── cogvideox
    │   │       │   ├── __init__.py
    │   │       │   ├── autoencoder_ks_cogvidex.py
    │   │       │   └── model.py
    │   │       ├── tae.py
    │   │       └── wan
    │   │       │   ├── __init__.py
    │   │       │   ├── vae.py
    │   │       │   └── vae_tiny.py
    │   │   └── trt
    │   │       ├── __init__.py
    │   │       └── autoencoder_kl_causal_3d
    │   │           ├── model.py
    │   │           └── trt_vae_infer.py
    └── utils
    │   ├── __init__.py
    │   ├── envs.py
    │   ├── generate_task_id.py
    │   ├── memory_profiler.py
    │   ├── profiler.py
    │   ├── prompt_enhancer.py
    │   ├── quant_utils.py
    │   ├── registry_factory.py
    │   ├── service_utils.py
    │   ├── set_config.py
    │   └── utils.py
├── pyproject.toml
├── requirements.txt
├── save_results
    └── .gitkeep
└── scripts
    ├── check_status.py
    ├── deploy
        ├── start_dit_server.sh
        ├── start_image_encoder_server.sh
        ├── start_prompt_enhancer.sh
        ├── start_server.sh
        ├── start_text_encoder_server.sh
        └── start_vae_server.sh
    ├── post.py
    ├── post_enhancer.py
    ├── post_i2v.py
    ├── post_multi_servers.py
    ├── run_cogvideox_t2v.sh
    ├── run_hunyuan_i2v.sh
    ├── run_hunyuan_i2v_save_quant.sh
    ├── run_hunyuan_i2v_taylorseer.sh
    ├── run_hunyuan_i2v_tea.sh
    ├── run_hunyuan_t2v.sh
    ├── run_hunyuan_t2v_dist.sh
    ├── run_hunyuan_t2v_save_quant.sh
    ├── run_hunyuan_t2v_taylorseer.sh
    ├── run_hunyuan_t2v_tea.sh
    ├── run_wan_i2v.sh
    ├── run_wan_i2v_advanced_ptq.sh
    ├── run_wan_i2v_causvid.sh
    ├── run_wan_i2v_dist.sh
    ├── run_wan_i2v_save_quant.sh
    ├── run_wan_i2v_tea.sh
    ├── run_wan_i2v_with_lora.sh
    ├── run_wan_skyreels_v2_df.sh
    ├── run_wan_skyreels_v2_i2v.sh
    ├── run_wan_skyreels_v2_t2v.sh
    ├── run_wan_t2v.sh
    ├── run_wan_t2v_causvid.sh
    ├── run_wan_t2v_causvid_save_quant.sh
    ├── run_wan_t2v_dist.sh
    ├── run_wan_t2v_enhancer.sh
    ├── run_wan_t2v_save_quant.sh
    ├── run_wan_t2v_sparge.sh
    ├── run_wan_t2v_tea.sh
    ├── start_multi_servers.sh
    ├── start_server.sh
    ├── start_server_enhancer.sh
    └── stop_running_task.py


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug Report
 3 | about: Use this template to report bugs in the project.
 4 | title: "[Bug] "
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ### Description
11 | Briefly describe the bug you encountered.
12 | 
13 | ### Steps to Reproduce
14 | 1. First step of the operation.
15 | 2. Second step of the operation.
16 | 3. ...
17 | 
18 | ### Expected Result
19 | Describe the normal behavior you expected.
20 | 
21 | ### Actual Result
22 | Describe the abnormal situation that actually occurred.
23 | 
24 | ### Environment Information
25 | - Operating System: [e.g., Ubuntu 22.04]
26 | - Commit ID: [Version of the project]
27 | 
28 | ### Log Information
29 | Please provide relevant error logs or debugging information.
30 | 
31 | ### Additional Information
32 | If there is any other information that can help solve the problem, please add it here.
33 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ## Contributing Guidelines
 2 | 
 3 | We have prepared a `pre-commit` hook to enforce consistent code formatting across the project. If your code complies with the standards, you should not see any errors, you can clean up your code following the steps below:
 4 | 
 5 | 1. Install the required dependencies:
 6 | 
 7 | ```shell
 8 |     pip install ruff pre-commit
 9 | ```
10 | 
11 | 2. Then, run the following command before commit:
12 | 
13 | ```shell
14 |     pre-commit run --all-files
15 | ```
16 | 
17 | 3. Finally, please double-check your code to ensure it complies with the following additional specifications as much as possible:
18 |   - Avoid hard-coding local paths: Make sure your submissions do not include hard-coded local paths, as these paths are specific to individual development environments and can cause compatibility issues. Use relative paths or configuration files instead.
19 |   - Clear error handling: Implement clear error-handling mechanisms in your code so that error messages can accurately indicate the location of the problem, possible causes, and suggested solutions, facilitating quick debugging.
20 |   - Detailed comments and documentation: Add comments to complex code sections and provide comprehensive documentation to explain the functionality of the code, input-output requirements, and potential error scenarios.
21 | 
22 | Thank you for your contributions!
23 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: lint
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 | 
 7 | concurrency:
 8 |   group: ${{ github.workflow }}-${{ github.ref }}
 9 |   cancel-in-progress: true
10 | 
11 | jobs:
12 |   lint:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - name: Checkout code
16 |         uses: actions/checkout@v4
17 | 
18 |       - name: Set up Python 3.11
19 |         uses: actions/setup-python@v4
20 |         with:
21 |           python-version: '3.11'
22 | 
23 |       - name: Cache Python dependencies
24 |         uses: actions/cache@v3
25 |         with:
26 |           path: ~/.cache/pip
27 |           key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }}
28 |           restore-keys: |
29 |             ${{ runner.os }}-pip-
30 | 
31 |       - name: Install pre-commit hook
32 |         run: |
33 |           pip install pre-commit ruff
34 | 
35 |       - name: Check pre-commit config file
36 |         run: |
37 |           if [ ! -f ".pre-commit-config.yaml" ]; then
38 |             echo "Error: .pre-commit-config.yaml not found."
39 |             exit 1
40 |           fi
41 | 
42 |       - name: Linting
43 |         run: |
44 |           echo "Running pre-commit on all files..."
45 |           pre-commit run --all-files || {
46 |             echo "Linting failed. Please check the above output for details."
47 |             exit 1
48 |           }
49 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pth
 2 | *.pt
 3 | *.onnx
 4 | *.pk
 5 | *.model
 6 | *.zip
 7 | *.tar
 8 | *.pyc
 9 | *.log
10 | *.o
11 | *.so
12 | *.a
13 | *.exe
14 | *.out
15 | .idea
16 | **.DS_Store**
17 | **/__pycache__/**
18 | **.swp
19 | .vscode/
20 | .env
21 | .log
22 | *.pid
23 | *.ipynb*
24 | *.mp4
25 | 
26 | # just4dev
27 | devscripts/
28 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "3rd/flash-attention"]
2 | 	path = 3rd/flash-attention
3 | 	url = https://github.com/Dao-AILab/flash-attention.git
4 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # Follow https://verdantfox.com/blog/how-to-use-git-pre-commit-hooks-the-hard-way-and-the-easy-way
 2 | repos:
 3 |   - repo: https://github.com/astral-sh/ruff-pre-commit
 4 |     rev: v0.11.0
 5 |     hooks:
 6 |       - id: ruff
 7 |         args: [--fix, --respect-gitignore, --config=pyproject.toml]
 8 |       - id: ruff-format
 9 |         args: [--config=pyproject.toml]
10 | 
11 |   - repo: https://github.com/pre-commit/pre-commit-hooks
12 |     rev: v4.5.0
13 |     hooks:
14 |       - id: trailing-whitespace
15 |       - id: end-of-file-fixer
16 |       - id: check-yaml
17 |       - id: check-toml
18 |       - id: check-added-large-files
19 |       - id: check-case-conflict
20 |       - id: check-merge-conflict
21 |       - id: debug-statements
22 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04 AS base
 2 | 
 3 | WORKDIR /workspace
 4 | 
 5 | COPY . /workspace/lightx2v/
 6 | 
 7 | ENV DEBIAN_FRONTEND=noninteractive
 8 | ENV LANG=C.UTF-8
 9 | ENV LC_ALL=C.UTF-8
10 | 
11 | # use tsinghua source
12 | RUN sed -i 's|http://archive.ubuntu.com/ubuntu/|https://mirrors.tuna.tsinghua.edu.cn/ubuntu/|g' /etc/apt/sources.list \
13 |     && sed -i 's|http://security.ubuntu.com/ubuntu/|https://mirrors.tuna.tsinghua.edu.cn/ubuntu/|g' /etc/apt/sources.list
14 | 
15 | RUN apt-get update && apt install -y software-properties-common  \
16 |     && add-apt-repository ppa:deadsnakes/ppa \
17 |     && apt-get update \
18 |     && apt-get install -y vim tmux zip unzip wget git cmake build-essential \
19 |      curl libibverbs-dev ca-certificates iproute2 \
20 |      ffmpeg libsm6 libxext6 \
21 |     && apt-get install -y python3.11 python3.11-venv python3.11-dev python3-pip \
22 |     && apt-get clean && rm -rf /var/lib/apt/lists/*
23 | 
24 | RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 \
25 |     && update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1
26 | 
27 | RUN pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple \
28 |     && pip install -r /workspace/lightx2v/requirements.txt
29 | 
30 | # Install again separately to bypass the version conflict check
31 | RUN pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple \
32 |     && pip install transformers==4.45.2
33 | 
34 | # install flash-attention 2
35 | RUN cd lightx2v/3rd/flash-attention && pip install --no-cache-dir -v -e .
36 | 
37 | # install flash-attention 3, only if hopper
38 | RUN cd lightx2v/3rd/flash-attention/hopper && pip install --no-cache-dir -v -e .
39 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # LightX2V: Light Video Generation Inference Framework
 2 | 
 3 | <div align="center" id="lightx2v">
 4 | <img alt="logo" src="assets/img_lightx2v.jpg" width=75%></img>
 5 | 
 6 | [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
 7 | [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/ModelTC/lightx2v)
 8 | [![Doc](https://img.shields.io/badge/docs-English-99cc2)](https://github.com/ModelTC/lightx2v/tree/main/docs/en_US)
 9 | [![Doc](https://img.shields.io/badge/文档-中文-99cc2)](https://github.com/ModelTC/lightx2v/tree/main/docs/zh_CN)
10 | [![Docker](https://badgen.net/badge/icon/docker?icon=docker&label)](https://hub.docker.com/r/lightx2v/lightx2v/tags)
11 | 
12 | </div>
13 | 
14 | --------------------------------------------------------------------------------
15 | 
16 | ## Supported Model List
17 | 
18 | ✅ [HunyuanVideo-T2V](https://huggingface.co/tencent/HunyuanVideo)
19 | 
20 | ✅ [HunyuanVideo-I2V](https://huggingface.co/tencent/HunyuanVideo-I2V)
21 | 
22 | ✅ [Wan2.1-T2V](https://huggingface.co/Wan-AI/Wan2.1-T2V-1.3B)
23 | 
24 | ✅ [Wan2.1-I2V](https://huggingface.co/Wan-AI/Wan2.1-I2V-14B-480P)
25 | 
26 | ✅ [Wan2.1-T2V-CausVid](https://huggingface.co/lightx2v/Wan2.1-T2V-14B-CausVid)
27 | 
28 | ✅ [SkyReels-V2-DF](https://huggingface.co/Skywork/SkyReels-V2-DF-14B-540P)
29 | 
30 | ✅ [CogVideoX1.5-5B-T2V](https://huggingface.co/THUDM/CogVideoX1.5-5B)
31 | 
32 | ## How to Run
33 | 
34 | Please refer to the [documentation](https://github.com/ModelTC/lightx2v/tree/main/docs) in lightx2v.
35 | 
36 | ## Contributing Guidelines
37 | 
38 | We have prepared a `pre-commit` hook to enforce consistent code formatting across the project.
39 | 
40 | 1. Install the required dependencies:
41 | 
42 | ```shell
43 | pip install ruff pre-commit
44 | ```
45 | 
46 | 2. Then, run the following command before commit:
47 | 
48 | ```shell
49 | pre-commit run --all-files
50 | ```
51 | 
52 | Thank you for your contributions!
53 | 
54 | 
55 | ## Acknowledgments
56 | 
57 | We built the code for this repository by referencing the code repositories involved in all the models mentioned above.
58 | 


--------------------------------------------------------------------------------
/assets/img_lightx2v.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/assets/img_lightx2v.jpg


--------------------------------------------------------------------------------
/assets/inputs/imgs/img_0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/assets/inputs/imgs/img_0.jpg


--------------------------------------------------------------------------------
/assets/inputs/imgs/img_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/assets/inputs/imgs/img_1.jpg


--------------------------------------------------------------------------------
/configs/advanced_ptq/wan_i2v.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 50,
 3 |     "target_video_length": 81,
 4 |     "text_len": 512,
 5 |     "target_height": 480,
 6 |     "target_width": 832,
 7 |     "attention_type": "flash_attn3",
 8 |     "seed": 42,
 9 |     "sample_guide_scale": 5,
10 |     "sample_shift": 5,
11 |     "enable_cfg": true,
12 |     "cpu_offload": false,
13 |     "mm_config": {
14 |         "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
15 |         "quant_method": "smoothquant"
16 |     },
17 |     "quant_model_path": "/path/to/int8_model"
18 | }
19 | 


--------------------------------------------------------------------------------
/configs/caching/hunyuan_i2v_TaylorSeer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 20,
 3 |     "target_video_length": 33,
 4 |     "i2v_resolution": "720p",
 5 |     "attention_type": "flash_attn3",
 6 |     "seed": 0,
 7 |     "mm_config": {
 8 |         "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
 9 |         "weight_auto_quant": true
10 |     },
11 |     "feature_caching": "TaylorSeer"
12 | }
13 | 


--------------------------------------------------------------------------------
/configs/caching/hunyuan_i2v_Tea.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 20,
 3 |     "target_video_length": 33,
 4 |     "i2v_resolution": "720p",
 5 |     "attention_type": "flash_attn3",
 6 |     "seed": 0,
 7 |     "mm_config": {
 8 |         "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
 9 |         "weight_auto_quant": true
10 |     },
11 |     "feature_caching": "Tea"
12 | }
13 | 


--------------------------------------------------------------------------------
/configs/caching/hunyuan_t2v_TaylorSeer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 20,
 3 |     "target_video_length": 33,
 4 |     "target_height": 720,
 5 |     "target_width": 1280,
 6 |     "attention_type": "flash_attn3",
 7 |     "seed": 42,
 8 |     "feature_caching": "TaylorSeer"
 9 | }
10 | 


--------------------------------------------------------------------------------
/configs/caching/hunyuan_t2v_Tea.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 20,
 3 |     "target_video_length": 33,
 4 |     "target_height": 720,
 5 |     "target_width": 1280,
 6 |     "attention_type": "flash_attn3",
 7 |     "seed": 42,
 8 |     "mm_config": {
 9 |         "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
10 |         "weight_auto_quant": true
11 |     },
12 |     "feature_caching": "Tea"
13 | }
14 | 


--------------------------------------------------------------------------------
/configs/caching/wan_i2v_Tea.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 40,
 3 |     "target_video_length": 81,
 4 |     "target_height": 480,
 5 |     "target_width": 832,
 6 |     "attention_type": "flash_attn3",
 7 |     "seed": 42,
 8 |     "sample_guide_scale": 5,
 9 |     "sample_shift": 5,
10 |     "enable_cfg": true,
11 |     "cpu_offload": false,
12 |     "mm_config": {
13 |         "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl",
14 |         "weight_auto_quant": true
15 |     },
16 |     "feature_caching": "Tea"
17 | }
18 | 


--------------------------------------------------------------------------------
/configs/caching/wan_t2v_Tea.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 50,
 3 |     "target_video_length": 81,
 4 |     "text_len": 512,
 5 |     "target_height": 480,
 6 |     "target_width": 832,
 7 |     "attention_type": "flash_attn3",
 8 |     "seed": 42,
 9 |     "sample_guide_scale": 6,
10 |     "sample_shift": 8,
11 |     "enable_cfg": true,
12 |     "cpu_offload": false,
13 |     "mm_config": {
14 |         "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl",
15 |         "weight_auto_quant": true
16 |     },
17 |     "feature_caching": "Tea"
18 | }
19 | 


--------------------------------------------------------------------------------
/configs/cogvideox_t2v.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "seed": 42,
 3 |     "text_len": 226,
 4 |     "num_videos_per_prompt": 1,
 5 |     "target_video_length": 81,
 6 |     "num_inference_steps": 50,
 7 |     "num_train_timesteps": 1000,
 8 |     "timestep_spacing": "trailing",
 9 |     "steps_offset": 0,
10 |     "latent_channels": 16,
11 |     "height": 768,
12 |     "width": 1360,
13 |     "vae_scale_factor_temporal": 4,
14 |     "vae_scale_factor_spatial": 8,
15 |     "vae_scaling_factor_image": 0.7,
16 |     "batch_size": 1,
17 |     "patch_size": 2,
18 |     "patch_size_t": 2,
19 |     "guidance_scale": 0,
20 |     "use_rotary_positional_embeddings": true,
21 |     "do_classifier_free_guidance": false,
22 |     "transformer_sample_width": 170,
23 |     "transformer_sample_height": 96,
24 |     "transformer_sample_frames": 81,
25 |     "transformer_attention_head_dim": 64,
26 |     "transformer_num_attention_heads": 48,
27 |     "transformer_temporal_compression_ratio": 4,
28 |     "transformer_temporal_interpolation_scale": 1.0,
29 |     "transformer_use_learned_positional_embeddings": false,
30 |     "transformer_spatial_interpolation_scale": 1.875,
31 |     "transformer_num_layers": 42,
32 |     "beta_schedule": "scaled_linear",
33 |     "scheduler_beta_start": 0.00085,
34 |     "scheduler_beta_end": 0.012,
35 |     "scheduler_set_alpha_to_one": true,
36 |     "scheduler_snr_shift_scale": 1.0,
37 |     "scheduler_rescale_betas_zero_snr": true,
38 |     "scheduler_prediction_type": "v_prediction",
39 |     "use_dynamic_cfg": true
40 | }
41 | 


--------------------------------------------------------------------------------
/configs/deploy/hunyuan_i2v.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 20,
 3 |     "target_video_length": 33,
 4 |     "i2v_resolution": "720p",
 5 |     "attention_type": "flash_attn3",
 6 |     "seed": 0,
 7 |     "sub_servers": {
 8 |         "dit": ["http://localhost:9000"],
 9 |         "prompt_enhancer": ["http://localhost:9001"],
10 |         "image_encoder": ["http://localhost:9003"],
11 |         "text_encoders": ["http://localhost:9002"],
12 |         "vae_model": ["http://localhost:9004"]
13 |     }
14 | }
15 | 


--------------------------------------------------------------------------------
/configs/deploy/hunyuan_t2v.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 20,
 3 |     "target_video_length": 33,
 4 |     "target_height": 720,
 5 |     "target_width": 1280,
 6 |     "attention_type": "flash_attn3",
 7 |     "seed": 42,
 8 |     "sub_servers": {
 9 |         "dit": ["http://localhost:9000"],
10 |         "prompt_enhancer": ["http://localhost:9001"],
11 |         "image_encoder": ["http://localhost:9003"],
12 |         "text_encoders": ["http://localhost:9002"],
13 |         "vae_model": ["http://localhost:9004"]
14 |     }
15 | }
16 | 


--------------------------------------------------------------------------------
/configs/deploy/wan_i2v.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 40,
 3 |     "target_video_length": 81,
 4 |     "target_height": 480,
 5 |     "target_width": 832,
 6 |     "attention_type": "flash_attn3",
 7 |     "seed": 42,
 8 |     "sample_guide_scale": 5,
 9 |     "sample_shift": 5,
10 |     "enable_cfg": true,
11 |     "cpu_offload": false,
12 |     "sub_servers": {
13 |         "dit": ["http://localhost:9000"],
14 |         "prompt_enhancer": ["http://localhost:9001"],
15 |         "text_encoders": ["http://localhost:9002"],
16 |         "image_encoder": ["http://localhost:9003"],
17 |         "vae_model": ["http://localhost:9004"]
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/configs/deploy/wan_t2v.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 50,
 3 |     "target_video_length": 81,
 4 |     "text_len": 512,
 5 |     "target_height": 480,
 6 |     "target_width": 832,
 7 |     "attention_type": "flash_attn3",
 8 |     "seed": 42,
 9 |     "sample_guide_scale": 6,
10 |     "sample_shift": 8,
11 |     "enable_cfg": true,
12 |     "cpu_offload": false,
13 |     "sub_servers": {
14 |         "dit": ["http://localhost:9000"],
15 |         "prompt_enhancer": ["http://localhost:9001"],
16 |         "text_encoders": ["http://localhost:9002"],
17 |         "image_encoder": ["http://localhost:9003"],
18 |         "vae_model": ["http://localhost:9004"]
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/configs/dist/hunyuan_t2v_dist_ring.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 20,
 3 |     "target_video_length": 33,
 4 |     "target_height": 720,
 5 |     "target_width": 1280,
 6 |     "attention_type": "flash_attn3",
 7 |     "seed": 42,
 8 |     "parallel_attn_type": "ring"
 9 | }
10 | 


--------------------------------------------------------------------------------
/configs/dist/hunyuan_t2v_dist_ulysses.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 20,
 3 |     "target_video_length": 33,
 4 |     "target_height": 720,
 5 |     "target_width": 1280,
 6 |     "attention_type": "flash_attn3",
 7 |     "seed": 42,
 8 |     "parallel_attn_type": "ulysses"
 9 | }
10 | 


--------------------------------------------------------------------------------
/configs/hunyuan_i2v.json:
--------------------------------------------------------------------------------
1 | {
2 |     "infer_steps": 20,
3 |     "target_video_length": 33,
4 |     "i2v_resolution": "720p",
5 |     "attention_type": "flash_attn3",
6 |     "seed": 0
7 | }
8 | 


--------------------------------------------------------------------------------
/configs/hunyuan_i2v_save_quant.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 20,
 3 |     "target_video_length": 33,
 4 |     "i2v_resolution": "720p",
 5 |     "attention_type": "flash_attn3",
 6 |     "seed": 0,
 7 |     "mm_config": {
 8 |         "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm"
 9 |     },
10 |     "quant_model_path": "./hy_i2v_quant_model"
11 | }
12 | 


--------------------------------------------------------------------------------
/configs/hunyuan_t2v.json:
--------------------------------------------------------------------------------
1 | {
2 |     "infer_steps": 20,
3 |     "target_video_length": 33,
4 |     "target_height": 720,
5 |     "target_width": 1280,
6 |     "attention_type": "flash_attn3",
7 |     "seed": 42
8 | }
9 | 


--------------------------------------------------------------------------------
/configs/hunyuan_t2v_save_quant.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 20,
 3 |     "target_video_length": 33,
 4 |     "target_height": 720,
 5 |     "target_width": 1280,
 6 |     "attention_type": "flash_attn3",
 7 |     "seed": 42,
 8 |     "mm_config": {
 9 |         "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm"
10 |     },
11 |     "quant_model_path": "./hy_t2v_quant_model"
12 | }
13 | 


--------------------------------------------------------------------------------
/configs/offload/wan_i2v_block.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 40,
 3 |     "target_video_length": 81,
 4 |     "target_height": 480,
 5 |     "target_width": 832,
 6 |     "attention_type": "flash_attn3",
 7 |     "seed": 42,
 8 |     "sample_guide_scale": 5,
 9 |     "sample_shift": 5,
10 |     "enable_cfg": true,
11 |     "cpu_offload": true,
12 |     "offload_granularity": "block",
13 |     "mm_config": {
14 |         "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Q8F",
15 |         "weight_auto_quant": true
16 |     }
17 | }
18 | 


--------------------------------------------------------------------------------
/configs/offload/wan_i2v_phase.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 40,
 3 |     "target_video_length": 81,
 4 |     "target_height": 480,
 5 |     "target_width": 832,
 6 |     "attention_type": "sage_attn2",
 7 |     "seed": 42,
 8 |     "sample_guide_scale": 5,
 9 |     "sample_shift": 5,
10 |     "enable_cfg": true,
11 |     "cpu_offload": true,
12 |     "offload_granularity": "phase",
13 |     "mm_config": {
14 |         "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Q8F",
15 |         "weight_auto_quant": true
16 |     },
17 |     "use_tiling_vae": true,
18 |     "tiny_vae": true,
19 |     "tiny_vae_path": "/mnt/afs_2/gushiqiao/x2v_models/taew2_1.pth",
20 |     "text_encoder_offload_granularity": "block"
21 | }
22 | 


--------------------------------------------------------------------------------
/configs/offload/wan_t2v_block.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 50,
 3 |     "target_video_length": 81,
 4 |     "text_len": 512,
 5 |     "target_height": 480,
 6 |     "target_width": 832,
 7 |     "attention_type": "sage_attn2",
 8 |     "seed": 42,
 9 |     "sample_guide_scale": 6,
10 |     "sample_shift": 8,
11 |     "enable_cfg": true,
12 |     "cpu_offload": true,
13 |     "offload_granularity": "block",
14 |     "mm_config": {
15 |         "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Q8F",
16 |         "weight_auto_quant": true
17 |     }
18 | }
19 | 


--------------------------------------------------------------------------------
/configs/offload/wan_t2v_phase.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 50,
 3 |     "target_video_length": 81,
 4 |     "text_len": 512,
 5 |     "target_height": 480,
 6 |     "target_width": 832,
 7 |     "attention_type": "sage_attn2",
 8 |     "seed": 42,
 9 |     "sample_guide_scale": 6,
10 |     "sample_shift": 8,
11 |     "enable_cfg": true,
12 |     "cpu_offload": true,
13 |     "offload_granularity": "phase",
14 |     "mm_config": {
15 |         "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Q8F",
16 |         "weight_auto_quant": true
17 |     },
18 |     "tiny_vae": true,
19 |     "tiny_vae_path": "/mnt/afs_2/gushiqiao/x2v_models/taew2_1.pth",
20 |     "text_encoder_offload_granularity": "block"
21 | }
22 | 


--------------------------------------------------------------------------------
/configs/wan_i2v.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 40,
 3 |     "target_video_length": 81,
 4 |     "target_height": 480,
 5 |     "target_width": 832,
 6 |     "attention_type": "flash_attn3",
 7 |     "seed": 42,
 8 |     "sample_guide_scale": 5,
 9 |     "sample_shift": 5,
10 |     "enable_cfg": true,
11 |     "cpu_offload": false
12 | }
13 | 


--------------------------------------------------------------------------------
/configs/wan_i2v_causvid.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 20,
 3 |     "target_video_length": 81,
 4 |     "target_height": 480,
 5 |     "target_width": 832,
 6 |     "attention_type": "flash_attn3",
 7 |     "seed": 42,
 8 |     "sample_guide_scale": 6,
 9 |     "sample_shift": 8,
10 |     "enable_cfg": false,
11 |     "cpu_offload": false,
12 |     "num_fragments": 3,
13 |     "num_frames": 21,
14 |     "num_frame_per_block": 7,
15 |     "num_blocks": 3,
16 |     "frame_seq_length": 1560,
17 |     "denoising_step_list": [999, 934, 862, 756, 603, 410, 250, 140, 74]
18 | }
19 | 


--------------------------------------------------------------------------------
/configs/wan_i2v_dist.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 40,
 3 |     "target_video_length": 81,
 4 |     "target_height": 480,
 5 |     "target_width": 832,
 6 |     "attention_type": "flash_attn3",
 7 |     "seed": 42,
 8 |     "sample_guide_scale": 5,
 9 |     "sample_shift": 5,
10 |     "enable_cfg": true,
11 |     "cpu_offload": false,
12 |     "parallel_attn_type": "ulysses",
13 |     "parallel_vae": true
14 | }
15 | 


--------------------------------------------------------------------------------
/configs/wan_i2v_save_quant.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 40,
 3 |     "target_video_length": 81,
 4 |     "target_height": 480,
 5 |     "target_width": 832,
 6 |     "attention_type": "flash_attn3",
 7 |     "seed": 42,
 8 |     "sample_guide_scale": 5,
 9 |     "sample_shift": 5,
10 |     "enable_cfg": true,
11 |     "cpu_offload": false,
12 |     "mm_config": {
13 |         "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl"
14 |     },
15 |     "quant_model_path": "./wan_i2v_quant_model"
16 | }
17 | 


--------------------------------------------------------------------------------
/configs/wan_skyreels_v2_df.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 20,
 3 |     "target_video_length": 97,
 4 |     "text_len": 512,
 5 |     "target_height": 544,
 6 |     "target_width": 960,
 7 |     "num_frames": 257,
 8 |     "base_num_frames": 97,
 9 |     "overlap_history": 17,
10 |     "addnoise_condition": 0,
11 |     "causal_block_size": 1,
12 |     "attention_type": "flash_attn3",
13 |     "seed": 42,
14 |     "sample_guide_scale": 5,
15 |     "sample_shift": 3
16 | }
17 | 


--------------------------------------------------------------------------------
/configs/wan_skyreels_v2_i2v.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 30,
 3 |     "target_video_length": 97,
 4 |     "text_len": 512,
 5 |     "target_height": 544,
 6 |     "target_width": 960,
 7 |     "attention_type": "flash_attn3",
 8 |     "seed": 42,
 9 |     "sample_guide_scale": 5,
10 |     "sample_shift": 3
11 | }
12 | 


--------------------------------------------------------------------------------
/configs/wan_skyreels_v2_t2v.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 30,
 3 |     "target_video_length": 97,
 4 |     "text_len": 512,
 5 |     "target_height": 544,
 6 |     "target_width": 960,
 7 |     "attention_type": "flash_attn3",
 8 |     "seed": 42,
 9 |     "sample_guide_scale": 6,
10 |     "sample_shift": 8
11 | }
12 | 


--------------------------------------------------------------------------------
/configs/wan_t2v.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 50,
 3 |     "target_video_length": 81,
 4 |     "text_len": 512,
 5 |     "target_height": 480,
 6 |     "target_width": 832,
 7 |     "attention_type": "flash_attn3",
 8 |     "seed": 42,
 9 |     "sample_guide_scale": 6,
10 |     "sample_shift": 8,
11 |     "enable_cfg": true,
12 |     "cpu_offload": false
13 | }
14 | 


--------------------------------------------------------------------------------
/configs/wan_t2v_causvid.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 9,
 3 |     "target_video_length": 81,
 4 |     "target_height": 480,
 5 |     "target_width": 832,
 6 |     "attention_type": "flash_attn3",
 7 |     "seed": 42,
 8 |     "sample_guide_scale": 6,
 9 |     "sample_shift": 8,
10 |     "enable_cfg": false,
11 |     "cpu_offload": false,
12 |     "num_fragments": 3,
13 |     "num_frames": 21,
14 |     "num_frame_per_block": 3,
15 |     "num_blocks": 7,
16 |     "frame_seq_length": 1560,
17 |     "denoising_step_list": [999, 934, 862, 756, 603, 410, 250, 140, 74]
18 | }
19 | 


--------------------------------------------------------------------------------
/configs/wan_t2v_causvid_save_quant.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 9,
 3 |     "target_video_length": 81,
 4 |     "target_height": 480,
 5 |     "target_width": 832,
 6 |     "attention_type": "flash_attn3",
 7 |     "seed": 42,
 8 |     "sample_guide_scale": 6,
 9 |     "sample_shift": 8,
10 |     "enable_cfg": false,
11 |     "cpu_offload": false,
12 |     "mm_config": {
13 |         "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl"
14 |     },
15 |     "num_fragments": 3,
16 |     "num_frames": 21,
17 |     "num_frame_per_block": 3,
18 |     "num_blocks": 7,
19 |     "frame_seq_length": 1560,
20 |     "denoising_step_list": [999, 934, 862, 756, 603, 410, 250, 140, 74]
21 | }
22 | 


--------------------------------------------------------------------------------
/configs/wan_t2v_dist.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 50,
 3 |     "target_video_length": 81,
 4 |     "text_len": 512,
 5 |     "target_height": 480,
 6 |     "target_width": 832,
 7 |     "attention_type": "flash_attn3",
 8 |     "seed": 42,
 9 |     "sample_guide_scale": 6,
10 |     "sample_shift": 8,
11 |     "enable_cfg": true,
12 |     "cpu_offload": false,
13 |     "parallel_attn_type": "ulysses",
14 |     "parallel_vae": true
15 | }
16 | 


--------------------------------------------------------------------------------
/configs/wan_t2v_save_quant.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 50,
 3 |     "target_video_length": 81,
 4 |     "text_len": 512,
 5 |     "target_height": 480,
 6 |     "target_width": 832,
 7 |     "attention_type": "flash_attn3",
 8 |     "seed": 42,
 9 |     "sample_guide_scale": 6,
10 |     "sample_shift": 8,
11 |     "enable_cfg": true,
12 |     "cpu_offload": false,
13 |     "mm_config": {
14 |         "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl"
15 |     },
16 |     "quant_model_path": "./wan_t2v_quant_model"
17 | }
18 | 


--------------------------------------------------------------------------------
/configs/wan_t2v_sparge.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "infer_steps": 50,
 3 |     "target_video_length": 81,
 4 |     "text_len": 512,
 5 |     "target_height": 480,
 6 |     "target_width": 832,
 7 |     "attention_type": "flash_attn3",
 8 |     "seed": 42,
 9 |     "sample_guide_scale": 6,
10 |     "sample_shift": 8,
11 |     "enable_cfg": true,
12 |     "cpu_offload": false,
13 |     "sparge": true,
14 |     "sparge_ckpt": "configs/shared_weights/sparge_wan2.1_t2v_1.3B.pt"
15 | }
16 | 


--------------------------------------------------------------------------------
/docs/en_US/01.prepare_envs.md:
--------------------------------------------------------------------------------
 1 | # Prepare Environment
 2 | 
 3 | We recommend using a docker environment. Here is the [dockerhub](https://hub.docker.com/r/lightx2v/lightx2v/tags) for lightx2v. Please select the tag with the latest date, for example, 25042502.
 4 | 
 5 | ```shell
 6 | docker pull lightx2v/lightx2v:25042502
 7 | docker run --gpus all -itd --ipc=host --name [container_name] -v [mount_settings]  --entrypoint /bin/bash [image_id]
 8 | ```
 9 | 
10 | If you want to set up the environment yourself using conda, you can refer to the following steps:
11 | 
12 | ```shell
13 | # clone repo and submodules
14 | git clone https://github.com/ModelTC/lightx2v.git lightx2v && cd lightx2v
15 | git submodule update --init --recursive
16 | 
17 | conda create -n lightx2v python=3.11 && conda activate lightx2v
18 | pip install -r requirements.txt
19 | 
20 | # Install again separately to bypass the version conflict check
21 | # The Hunyuan model needs to run under this version of transformers. If you do not need to run the Hunyuan model, you can ignore this step.
22 | pip install transformers==4.45.2
23 | 
24 | # install flash-attention 2
25 | cd lightx2v/3rd/flash-attention && pip install --no-cache-dir -v -e .
26 | 
27 | # install flash-attention 3, only if hopper
28 | cd lightx2v/3rd/flash-attention/hopper && pip install --no-cache-dir -v -e .
29 | ```
30 | 
31 | # Infer
32 | 
33 | ```shell
34 | # Modify the path in the script
35 | bash scripts/run_wan_t2v.sh
36 | ```
37 | 
38 | In addition to the existing input arguments in the script, there are also some necessary parameters in the `${lightx2v_path}/configs/wan_t2v.json` file specified by `--config_json`. You can modify them as needed.
39 | 


--------------------------------------------------------------------------------
/docs/en_US/03.quantization.md:
--------------------------------------------------------------------------------
 1 | # Quantization
 2 | 
 3 | lightx2v supports quantized inference for linear layers, supporting w8a8-int8 and w8a8-fp8 matrix multiplication.
 4 | 
 5 | 
 6 | ### Run Quantized Inference
 7 | 
 8 | ```shell
 9 | # Modify the path in the script
10 | bash scripts/run_wan_t2v_save_quant.sh
11 | ```
12 | 
13 | There are two execution commands in the script:
14 | 
15 | #### Save Quantization Weights
16 | 
17 | Set the `RUNNING_FLAG` environment variable to `save_naive_quant`, and set `--config_json` to the corresponding `json` file: `${lightx2v_path}/configs/wan_t2v_save_quant.json`. In this file, `quant_model_path` specifies the path to save the quantized model.
18 | 
19 | #### Load Quantization Weights and Inference
20 | 
21 | Set the `RUNNING_FLAG` environment variable to `infer`, and set `--config_json` to the `json` file from the previous step.
22 | 
23 | ### Start Quantization Service
24 | 
25 | After saving the quantized weights, as in the previous loading step, set the `RUNNING_FLAG` environment variable to `infer`, and set `--config_json` to the `json` file from the first step.
26 | 
27 | For example, modify the `scripts/start_server.sh` script as follows:
28 | 
29 | ```shell
30 | export RUNNING_FLAG=infer
31 | 
32 | python -m lightx2v.api_server \
33 | --model_cls wan2.1 \
34 | --task t2v \
35 | --model_path $model_path \
36 | --config_json ${lightx2v_path}/configs/wan_t2v_save_quant.json \
37 | --port 8000
38 | ```
39 | 


--------------------------------------------------------------------------------
/docs/zh_CN/01.prepare_envs.md:
--------------------------------------------------------------------------------
 1 | # 准备环境
 2 | 
 3 | 我们推荐使用docker环境，这是lightx2v的[dockerhub](https://hub.docker.com/r/lightx2v/lightx2v/tags)，请选择一个最新日期的tag，比如25042502
 4 | 
 5 | ```shell
 6 | docker pull lightx2v/lightx2v:25042502
 7 | docker run --gpus all -itd --ipc=host --name [容器名] -v [挂载设置]  --entrypoint /bin/bash [镜像id]
 8 | ```
 9 | 
10 | 如果你想使用conda自己搭建环境，可以参考如下步骤：
11 | 
12 | ```shell
13 | # 下载github代码
14 | git clone https://github.com/ModelTC/lightx2v.git lightx2v && cd lightx2v
15 | git submodule update --init --recursive
16 | 
17 | conda create -n lightx2v python=3.11 && conda activate lightx2v
18 | pip install -r requirements.txt
19 | 
20 | # 单独重新安装transformers，避免pip的冲突检查
21 | # 混元模型需要在4.45.2版本的transformers下运行，如果不需要跑混元模型，可以忽略
22 | pip install transformers==4.45.2
23 | 
24 | # 安装 flash-attention 2
25 | cd lightx2v/3rd/flash-attention && pip install --no-cache-dir -v -e .
26 | 
27 | # 安装 flash-attention 3, 用于 hopper 显卡
28 | cd lightx2v/3rd/flash-attention/hopper && pip install --no-cache-dir -v -e .
29 | ```
30 | 
31 | # 推理
32 | 
33 | ```shell
34 | # 修改脚本中的路径
35 | bash scripts/run_wan_t2v.sh
36 | ```
37 | 
38 | 除了脚本中已有的输入参数，`--config_json`指向的`${lightx2v_path}/configs/wan_t2v.json`中也会存在一些必要的参数，可以根据需要，自行修改。
39 | 


--------------------------------------------------------------------------------
/docs/zh_CN/02.start_server.md:
--------------------------------------------------------------------------------
 1 | # 如何启动服务
 2 | 
 3 | lightx2v提供了异步服务功能，代码入口处在[这里](https://github.com/ModelTC/lightx2v/blob/main/lightx2v/api_server.py)
 4 | 
 5 | 
 6 | ### 启动服务
 7 | 
 8 | ```shell
 9 | # 修改脚本中的路径
10 | bash scripts/start_server.sh
11 | ```
12 | 
13 | 其中的`--port 8000`表示服务绑定在本机的`8000`端口上，可以自行修改
14 | 
15 | 
16 | ### 客户端发送请求
17 | 
18 | ```shell
19 | python scripts/post.py
20 | ```
21 | 
22 | 服务的接口是：`/v1/local/video/generate`
23 | 
24 | `scripts/post.py`中的`message`参数如下：
25 | 
26 | ```python
27 | message = {
28 |     "task_id": generate_task_id(),
29 |     "task_id_must_unique": True,
30 |     "prompt": "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage.",
31 |     "negative_prompt": "色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走",
32 |     "image_path": "",
33 |     "save_video_path": "./output_lightx2v_wan_t2v_t02.mp4",
34 | }
35 | ```
36 | 
37 | 1. `prompt`, `negative_prompt`, `image_path`是一些基础的视频生成的输入，`image_path`可以为空字符，表示不需要图片输入
38 | 
39 | 2. `save_video_path`表示服务端生成的视频的路径，相对路径是相对服务端的启动路径，建议根据你自己的环境，设置一个绝对路径。
40 | 
41 | 3. `task_id`表示该任务的id，格式是一个字符串。可以自定义个字符串，也可以调用`generate_task_id()`函数生成一个随机的字符串。任务的id用来区分不同的视频生成任务。
42 | 
43 | 4. `task_id_must_unique`表示是否要求每个`task_id`是独一无二的，即不能发有重复的`task_id`。如果是`False`，就没有这个强制要求，此时如果发送了重复的`task_id`，服务端的`task`记录将会被相同`task_id`的较新的`task`覆盖掉。如果不需要记录所有的`task`以用于查询，那这里就可以设置成`False`。
44 | 
45 | 
46 | ### 客户端获取服务端的状态
47 | 
48 | ```shell
49 | python scripts/check_status.py
50 | ```
51 | 
52 | 其中服务的接口有：
53 | 
54 | 1. `/v1/local/video/generate/service_status`用于检查服务的状态，可以返回得到服务是`busy`还是`idle`，只有在`idle`状态，该服务才会接收新的请求。
55 | 
56 | 2. `/v1/local/video/generate/get_all_tasks`用于获取服务端接收到的且已完成的所有的任务。
57 | 
58 | 3. `/v1/local/video/generate/task_status`用于获取指定`task_id`的状态，可以返回得到该任务是`processing`还是`completed`。
59 | 
60 | 
61 | ### 客户端随时终止服务端当前的任务
62 | 
63 | ```shell
64 | python scripts/stop_running_task.py
65 | ```
66 | 
67 | 服务的接口是：`/v1/local/video/generate/stop_running_task`
68 | 
69 | 终止了任务之后，服务端并不会退出服务，而是回到等待接收新请求的状态。
70 | 
71 | ### 单节点同时起多个服务
72 | 
73 | 在单节点上，可以多次使用`scripts/start_server.sh`同时起多个服务（注意同一个ip下的端口号，不同服务之间要保持不同），也可以直接通过`scripts/start_multi_servers.sh`一次性起多个服务：
74 | 
75 | ```shell
76 | num_gpus=8 bash scripts/start_multi_servers.sh
77 | ```
78 | 
79 | 其中`num_gpus`表示启动的服务数；服务将在`--start_port`开始的连续`num_gpus`个端口上运行。
80 | 
81 | 
82 | ### 多个服务之间的调度
83 | 
84 | ```shell
85 | python scripts/post_multi_servers.py
86 | ```
87 | 
88 | `post_multi_servers.py`会根据服务的空闲状态，调度客户端发起的多个请求。
89 | 


--------------------------------------------------------------------------------
/docs/zh_CN/03.quantization.md:
--------------------------------------------------------------------------------
 1 | # 量化
 2 | 
 3 | lightx2v支持对linear进行量化推理，支持w8a8-int8和w8a8-fp8的矩阵乘法。
 4 | 
 5 | 
 6 | ### 运行量化推理
 7 | 
 8 | ```shell
 9 | # 修改脚本中的路径
10 | bash scripts/run_wan_t2v_save_quant.sh
11 | ```
12 | 
13 | 脚本中，有两个执行命令：
14 | 
15 | #### save quantization weight
16 | 
17 | 将`RUNNING_FLAG`环境变量设置成`save_naive_quant`，`--config_json`指向到该`json`文件: `${lightx2v_path}/configs/wan_t2v_save_quant.json`，其中`quant_model_path`会保存下量化的模型的路径
18 | 
19 | #### load quantization weight and inference
20 | 
21 | 将`RUNNING_FLAG`环境变量设置成`infer`，`--config_json`指向到第一步中的`json`文件
22 | 
23 | ### 启动量化服务
24 | 
25 | 在存好量化权重之后，和上一步加载步骤一样，将`RUNNING_FLAG`环境变量设置成`infer`，`--config_json`指向到第一步中的`json`文件
26 | 
27 | 比如，将`scripts/start_server.sh`脚本进行如下改动：
28 | 
29 | ```shell
30 | export RUNNING_FLAG=infer
31 | 
32 | python -m lightx2v.api_server \
33 | --model_cls wan2.1 \
34 | --task t2v \
35 | --model_path $model_path \
36 | --config_json ${lightx2v_path}/configs/wan_t2v_save_quant.json \
37 | --port 8000
38 | ```
39 | 


--------------------------------------------------------------------------------
/examples/vae_trt/convert_trt.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export PYTHONPATH="./":$PYTHONPATH
 3 | # onnx_path=""
 4 | # trtexec \
 5 | #     --onnx=${onnx_path} \
 6 | #     --saveEngine="./vae_decoder_hf_sim.engine" \
 7 | #     --allowWeightStreaming \
 8 | #     --stronglyTyped \
 9 | #     --fp16 \
10 | #     --weightStreamingBudget=100 \
11 | #     --minShapes=inp:1x16x9x18x16 \
12 | #     --optShapes=inp:1x16x17x32x16 \
13 | #     --maxShapes=inp:1x16x17x32x32
14 | 
15 | model_path=""
16 | python examples/vae_trt/convert_vae_trt_engine.py --model_path ${model_path}
17 | 


--------------------------------------------------------------------------------
/examples/vae_trt/convert_vae_trt_engine.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import os
 3 | import argparse
 4 | 
 5 | import torch
 6 | from loguru import logger
 7 | 
 8 | from lightx2v.models.video_encoders.hf.autoencoder_kl_causal_3d.autoencoder_kl_causal_3d import AutoencoderKLCausal3D
 9 | from lightx2v.models.video_encoders.trt.autoencoder_kl_causal_3d.trt_vae_infer import HyVaeTrtModelInfer
10 | 
11 | 
12 | def parse_args():
13 |     args = argparse.ArgumentParser()
14 |     args.add_argument("--model_path", help="", type=str)
15 |     args.add_argument("--dtype", default=torch.float16)
16 |     args.add_argument("--device", default="cuda", type=str)
17 |     return args.parse_args()
18 | 
19 | 
20 | def convert_vae_trt_engine(args):
21 |     vae_path = os.path.join(args.model_path, "hunyuan-video-t2v-720p/vae")
22 |     assert Path(vae_path).exists(), f"{vae_path} not exists."
23 |     config = AutoencoderKLCausal3D.load_config(vae_path)
24 |     model = AutoencoderKLCausal3D.from_config(config)
25 |     assert Path(os.path.join(vae_path, "pytorch_model.pt")).exists(), f"{os.path.join(vae_path, 'pytorch_model.pt')} not exists."
26 |     ckpt = torch.load(os.path.join(vae_path, "pytorch_model.pt"), map_location="cpu", weights_only=True)
27 |     model.load_state_dict(ckpt)
28 |     model = model.to(dtype=args.dtype, device=args.device)
29 |     onnx_path = HyVaeTrtModelInfer.export_to_onnx(model.decoder, vae_path)
30 |     del model
31 |     torch.cuda.empty_cache()
32 |     engine_path = onnx_path.replace(".onnx", ".engine")
33 |     HyVaeTrtModelInfer.convert_to_trt_engine(onnx_path, engine_path)
34 |     logger.info(f"ONNX: {onnx_path}")
35 |     logger.info(f"TRT Engine: {engine_path}")
36 |     return
37 | 
38 | 
39 | def main():
40 |     args = parse_args()
41 |     convert_vae_trt_engine(args)
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     main()
46 | 


--------------------------------------------------------------------------------
/lightx2v/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/__init__.py


--------------------------------------------------------------------------------
/lightx2v/attentions/__init__.py:
--------------------------------------------------------------------------------
 1 | from lightx2v.attentions.common.torch_sdpa import torch_sdpa
 2 | from lightx2v.attentions.common.flash_attn2 import flash_attn2
 3 | from lightx2v.attentions.common.flash_attn3 import flash_attn3
 4 | from lightx2v.attentions.common.sage_attn2 import sage_attn2
 5 | 
 6 | 
 7 | def attention(attention_type="flash_attn2", *args, **kwargs):
 8 |     if attention_type == "torch_sdpa":
 9 |         return torch_sdpa(*args, **kwargs)
10 |     elif attention_type == "flash_attn2":
11 |         return flash_attn2(*args, **kwargs)
12 |     elif attention_type == "flash_attn3":
13 |         return flash_attn3(*args, **kwargs)
14 |     elif attention_type == "sage_attn2":
15 |         return sage_attn2(*args, **kwargs)
16 |     else:
17 |         raise NotImplementedError(f"Unsupported attention mode: {attention_type}")
18 | 


--------------------------------------------------------------------------------
/lightx2v/attentions/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/attentions/common/__init__.py


--------------------------------------------------------------------------------
/lightx2v/attentions/common/flash_attn2.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     from flash_attn.flash_attn_interface import flash_attn_varlen_func
 3 | except ImportError:
 4 |     flash_attn_varlen_func = None
 5 | 
 6 | 
 7 | def flash_attn2(q, k, v, cu_seqlens_q=None, cu_seqlens_kv=None, max_seqlen_q=None, max_seqlen_kv=None, model_cls=None):
 8 |     x = flash_attn_varlen_func(
 9 |         q,
10 |         k,
11 |         v,
12 |         cu_seqlens_q,
13 |         cu_seqlens_kv,
14 |         max_seqlen_q,
15 |         max_seqlen_kv,
16 |     ).reshape(max_seqlen_q, -1)
17 |     return x
18 | 


--------------------------------------------------------------------------------
/lightx2v/attentions/common/flash_attn3.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     from flash_attn_interface import flash_attn_varlen_func as flash_attn_varlen_func_v3
 3 | except ImportError:
 4 |     flash_attn_varlen_func_v3 = None
 5 | 
 6 | 
 7 | def flash_attn3(q, k, v, cu_seqlens_q=None, cu_seqlens_kv=None, max_seqlen_q=None, max_seqlen_kv=None, model_cls=None):
 8 |     x = flash_attn_varlen_func_v3(
 9 |         q,
10 |         k,
11 |         v,
12 |         cu_seqlens_q,
13 |         cu_seqlens_kv,
14 |         max_seqlen_q,
15 |         max_seqlen_kv,
16 |     )[0].reshape(max_seqlen_q, -1)
17 |     return x
18 | 


--------------------------------------------------------------------------------
/lightx2v/attentions/common/sage_attn2.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | if torch.cuda.get_device_capability(0) == (8, 9):
 4 |     try:
 5 |         from sageattention import sageattn_qk_int8_pv_fp16_triton as sageattn
 6 |     except ImportError:
 7 |         sageattn = None, None
 8 | else:
 9 |     try:
10 |         from sageattention import sageattn
11 |     except ImportError:
12 |         sageattn = None
13 | 
14 | 
15 | def sage_attn2(q, k, v, cu_seqlens_q=None, cu_seqlens_kv=None, max_seqlen_q=None, max_seqlen_kv=None, model_cls="hunyuan"):
16 |     q, k, v = q.contiguous(), k.contiguous(), v.contiguous()
17 |     if model_cls == "hunyuan":
18 |         x1 = sageattn(
19 |             q[: cu_seqlens_q[1]].unsqueeze(0),
20 |             k[: cu_seqlens_kv[1]].unsqueeze(0),
21 |             v[: cu_seqlens_kv[1]].unsqueeze(0),
22 |             tensor_layout="NHD",
23 |         )
24 |         x2 = sageattn(
25 |             q[cu_seqlens_q[1] :].unsqueeze(0),
26 |             k[cu_seqlens_kv[1] :].unsqueeze(0),
27 |             v[cu_seqlens_kv[1] :].unsqueeze(0),
28 |             tensor_layout="NHD",
29 |         )
30 |         x = torch.cat((x1, x2), dim=1)
31 |         x = x.view(max_seqlen_q, -1)
32 |     elif model_cls in ["wan2.1", "wan2.1_causvid", "wan2.1_df"]:
33 |         x = sageattn(
34 |             q.unsqueeze(0),
35 |             k.unsqueeze(0),
36 |             v.unsqueeze(0),
37 |             tensor_layout="NHD",
38 |         )
39 |         x = x.view(max_seqlen_q, -1)
40 |     return x
41 | 


--------------------------------------------------------------------------------
/lightx2v/attentions/common/torch_sdpa.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | def torch_sdpa(
 6 |     q,
 7 |     k,
 8 |     v,
 9 |     drop_rate=0,
10 |     attn_mask=None,
11 |     causal=False,
12 | ):
13 |     q = q.transpose(1, 2)
14 |     k = k.transpose(1, 2)
15 |     v = v.transpose(1, 2)
16 |     if attn_mask is not None and attn_mask.dtype != torch.bool:
17 |         attn_mask = attn_mask.to(q.dtype)
18 |     x = F.scaled_dot_product_attention(q, k, v, attn_mask=attn_mask, dropout_p=drop_rate, is_causal=causal)
19 |     x = x.transpose(1, 2)
20 |     b, s, a, d = x.shape
21 |     out = x.reshape(b, s, -1)
22 |     return out
23 | 


--------------------------------------------------------------------------------
/lightx2v/attentions/distributed/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/attentions/distributed/__init__.py


--------------------------------------------------------------------------------
/lightx2v/attentions/distributed/comm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/attentions/distributed/comm/__init__.py


--------------------------------------------------------------------------------
/lightx2v/attentions/distributed/comm/all2all.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch._dynamo as dynamo
 3 | import torch.distributed as dist
 4 | 
 5 | 
 6 | @dynamo.disable
 7 | def all2all_seq2head(input):
 8 |     """
 9 |     将输入张量从 [seq_len/N, heads, hidden_dims] 转换为 [seq_len, heads/N, hidden_dims] 的格式。
10 | 
11 |     参数:
12 |         input (torch.Tensor): 输入张量，形状为 [seq_len/N, heads, hidden_dims]
13 | 
14 |     返回:
15 |         torch.Tensor: 转换后的输出张量，形状为 [seq_len, heads/N, hidden_dims]
16 |     """
17 |     # 确保输入是一个3D张量
18 |     assert input.dim() == 3, f"input must be 3D tensor"
19 | 
20 |     # 获取当前进程的世界大小
21 |     world_size = dist.get_world_size()
22 | 
23 |     # 获取输入张量的形状
24 |     shard_seq_len, heads, hidden_dims = input.shape
25 |     seq_len = shard_seq_len * world_size  # 计算总序列长度
26 |     shard_heads = heads // world_size  # 计算每个进程处理的头数
27 | 
28 |     # 重塑输入张量以便进行 all-to-all 操作
29 |     input_t = (
30 |         input.reshape(shard_seq_len, world_size, shard_heads, hidden_dims)  # 重塑为 [shard_seq_len, world_size, shard_heads, hidden_dims]
31 |         .transpose(0, 1)  # 转置以便进行 all-to-all 操作
32 |         .contiguous()  # 确保内存连续
33 |     )
34 | 
35 |     # 创建一个与输入张量相同形状的输出张量
36 |     output = torch.empty_like(input_t)
37 | 
38 |     # 执行 all-to-all 操作，将输入张量的内容分发到所有进程
39 |     dist.all_to_all_single(output, input_t)
40 | 
41 |     # 重塑输出张量为 [seq_len, heads/N, hidden_dims] 形状
42 |     output = output.reshape(seq_len, shard_heads, hidden_dims).contiguous()
43 | 
44 |     return output  # 返回转换后的输出张量
45 | 
46 | 
47 | @dynamo.disable
48 | def all2all_head2seq(input):
49 |     """
50 |     将输入张量从 [seq_len, heads/N, hidden_dims] 转换为 [seq_len/N, heads, hidden_dims] 的格式。
51 | 
52 |     参数:
53 |         input (torch.Tensor): 输入张量，形状为 [seq_len, heads/N, hidden_dims]
54 | 
55 |     返回:
56 |         torch.Tensor: 转换后的输出张量，形状为 [seq_len/N, heads, hidden_dims]
57 |     """
58 |     # 确保输入是一个3D张量
59 |     assert input.dim() == 3, f"input must be 3D tensor"
60 | 
61 |     # 获取当前进程的世界大小
62 |     world_size = dist.get_world_size()
63 | 
64 |     # 获取输入张量的形状
65 |     seq_len, shard_heads, hidden_dims = input.shape
66 |     heads = shard_heads * world_size  # 计算总头数
67 |     shard_seq_len = seq_len // world_size  # 计算每个进程处理的序列长度
68 | 
69 |     # 重塑输入张量以便进行 all-to-all 操作
70 |     input_t = (
71 |         input.reshape(world_size, shard_seq_len, shard_heads, hidden_dims)  # 重塑为 [world_size, shard_seq_len, shard_heads, hidden_dims]
72 |         .transpose(1, 2)  # 转置以便进行 all-to-all 操作
73 |         .contiguous()  # 确保内存连续
74 |         .reshape(world_size, shard_heads, shard_seq_len, hidden_dims)  # 再次重塑为 [world_size, shard_heads, shard_seq_len, hidden_dims]
75 |     )
76 | 
77 |     # 创建一个与输入张量相同形状的输出张量
78 |     output = torch.empty_like(input_t)
79 | 
80 |     # 执行 all-to-all 操作，将输入张量的内容分发到所有进程
81 |     dist.all_to_all_single(output, input_t)
82 | 
83 |     # 重塑输出张量为 [heads, shard_seq_len, hidden_dims] 形状
84 |     output = output.reshape(heads, shard_seq_len, hidden_dims)
85 | 
86 |     # 转置输出张量并重塑为 [shard_seq_len, heads, hidden_dims] 形状
87 |     output = output.transpose(0, 1).contiguous().reshape(shard_seq_len, heads, hidden_dims)
88 | 
89 |     return output  # 返回转换后的输出张量
90 | 


--------------------------------------------------------------------------------
/lightx2v/attentions/distributed/comm/ring_comm.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | from loguru import logger
 3 | import torch
 4 | import torch.distributed as dist
 5 | 
 6 | 
 7 | class RingComm:
 8 |     def __init__(self, process_group: dist.ProcessGroup = None):
 9 |         self._process_group = process_group
10 |         self._ops = []
11 |         self.rank = dist.get_rank(self._process_group)
12 |         self.world_size = dist.get_world_size(self._process_group)
13 |         self._reqs = None
14 | 
15 |         self.send_rank = (self.rank + 1) % self.world_size
16 |         self.recv_rank = (self.rank - 1) % self.world_size
17 | 
18 |         if process_group is not None:
19 |             self.send_rank = dist.get_global_rank(self._process_group, self.send_rank)
20 |             self.recv_rank = dist.get_global_rank(self._process_group, self.recv_rank)
21 | 
22 |     def send_recv(self, to_send: torch.Tensor, recv_tensor: Optional[torch.Tensor] = None) -> torch.Tensor:
23 |         if recv_tensor is None:
24 |             res = torch.empty_like(to_send)
25 |             # logger.info(f"send_recv: empty_like {to_send.shape}")
26 |         else:
27 |             res = recv_tensor
28 | 
29 |         send_op = dist.P2POp(dist.isend, to_send, self.send_rank, group=self._process_group)
30 |         recv_op = dist.P2POp(dist.irecv, res, self.recv_rank, group=self._process_group)
31 |         self._ops.append(send_op)
32 |         self._ops.append(recv_op)
33 |         return res
34 | 
35 |     def commit(self):
36 |         if self._reqs is not None:
37 |             raise RuntimeError("commit called twice")
38 |         self._reqs = dist.batch_isend_irecv(self._ops)
39 | 
40 |     def wait(self):
41 |         if self._reqs is None:
42 |             raise RuntimeError("wait called before commit")
43 |         for req in self._reqs:
44 |             req.wait()
45 |         self._reqs = None
46 |         self._ops = []
47 | 


--------------------------------------------------------------------------------
/lightx2v/attentions/distributed/partial_heads_attn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/attentions/distributed/partial_heads_attn/__init__.py


--------------------------------------------------------------------------------
/lightx2v/attentions/distributed/partial_heads_attn/attn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.distributed as dist
 3 | from lightx2v.attentions import attention
 4 | 
 5 | 
 6 | def partial_heads_attn(attention_type, q, k, v, cu_seqlens_qkv, max_seqlen_qkv):
 7 |     num_heads = q.shape[-2]
 8 |     cur_rank = dist.get_rank()
 9 |     world_size = dist.get_world_size()
10 |     num_chunk_heads = int(num_heads / dist.get_world_size())
11 | 
12 |     if cur_rank == world_size - 1:
13 |         q = q[:, num_chunk_heads * cur_rank :, :]
14 |         k = k[:, num_chunk_heads * cur_rank :, :]
15 |         v = v[:, num_chunk_heads * cur_rank :, :]
16 |     else:
17 |         q = q[:, num_chunk_heads * cur_rank : num_chunk_heads * (cur_rank + 1), :]
18 |         k = k[:, num_chunk_heads * cur_rank : num_chunk_heads * (cur_rank + 1), :]
19 |         v = v[:, num_chunk_heads * cur_rank : num_chunk_heads * (cur_rank + 1), :]
20 | 
21 |     output = attention(
22 |         attention_type=attention_type,
23 |         q=q,
24 |         k=k,
25 |         v=v,
26 |         cu_seqlens_q=cu_seqlens_qkv,
27 |         cu_seqlens_kv=cu_seqlens_qkv,
28 |         max_seqlen_q=max_seqlen_qkv,
29 |         max_seqlen_kv=max_seqlen_qkv,
30 |     )
31 | 
32 |     gathered_outputs = [torch.empty_like(output) for _ in range(world_size)]
33 |     dist.all_gather(gathered_outputs, output)
34 | 
35 |     combined_output = torch.cat(gathered_outputs, dim=1)
36 | 
37 |     return combined_output
38 | 


--------------------------------------------------------------------------------
/lightx2v/attentions/distributed/partial_heads_attn/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/attentions/distributed/partial_heads_attn/tests/__init__.py


--------------------------------------------------------------------------------
/lightx2v/attentions/distributed/partial_heads_attn/tests/test.sh:
--------------------------------------------------------------------------------
1 | export PYTHONPATH=/workspace/lightx2v:$PYTHONPATH
2 | export CUDA_VISIBLE_DEVICES=0,1
3 | torchrun --nproc_per_node=2 test_acc.py
4 | 


--------------------------------------------------------------------------------
/lightx2v/attentions/distributed/partial_heads_attn/tests/test_acc.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.distributed as dist
 3 | from lightx2v.attentions import attention
 4 | from lightx2v.utils.utils import seed_all
 5 | from loguru import logger
 6 | 
 7 | 
 8 | seed_all(42)
 9 | 
10 | 
11 | def prepare_tensors():
12 |     cur_rank = dist.get_rank()  # 获取当前进程的 rank
13 |     torch.cuda.set_device(cur_rank)  # 设置当前进程的 CUDA 设备
14 |     q = torch.randn(32656, 24, 128, dtype=torch.bfloat16).cuda()
15 |     k = torch.randn(32656, 24, 128, dtype=torch.bfloat16).cuda()
16 |     v = torch.randn(32656, 24, 128, dtype=torch.bfloat16).cuda()
17 | 
18 |     cu_seqlens_qkv = torch.tensor([0, 32411, 32656], dtype=torch.int32).cuda()
19 |     max_seqlen_qkv = 32656
20 |     return q, k, v, cu_seqlens_qkv, max_seqlen_qkv
21 | 
22 | 
23 | def test_part_head():
24 |     q, k, v, cu_seqlens_qkv, max_seqlen_qkv = prepare_tensors()
25 | 
26 |     # 先计算完整的结果作为参考
27 |     single_gpu_output = attention(
28 |         q=q,
29 |         k=k,
30 |         v=v,
31 |         cu_seqlens_q=cu_seqlens_qkv,
32 |         cu_seqlens_kv=cu_seqlens_qkv,
33 |         max_seqlen_q=max_seqlen_qkv,
34 |         max_seqlen_kv=max_seqlen_qkv,
35 |     )
36 | 
37 |     num_heads = q.shape[-2]
38 |     cur_rank = dist.get_rank()
39 |     world_size = dist.get_world_size()
40 |     num_chunk_heads = int(num_heads / dist.get_world_size())
41 | 
42 |     if cur_rank == world_size - 1:
43 |         q = q[:, num_chunk_heads * cur_rank :, :]
44 |         k = k[:, num_chunk_heads * cur_rank :, :]
45 |         v = v[:, num_chunk_heads * cur_rank :, :]
46 |     else:
47 |         q = q[:, num_chunk_heads * cur_rank : num_chunk_heads * (cur_rank + 1), :]
48 |         k = k[:, num_chunk_heads * cur_rank : num_chunk_heads * (cur_rank + 1), :]
49 |         v = v[:, num_chunk_heads * cur_rank : num_chunk_heads * (cur_rank + 1), :]
50 | 
51 |     output = attention(
52 |         q=q,
53 |         k=k,
54 |         v=v,
55 |         cu_seqlens_q=cu_seqlens_qkv,
56 |         cu_seqlens_kv=cu_seqlens_qkv,
57 |         max_seqlen_q=max_seqlen_qkv,
58 |         max_seqlen_kv=max_seqlen_qkv,
59 |     )
60 | 
61 |     gathered_outputs = [torch.empty_like(output) for _ in range(world_size)]
62 |     dist.all_gather(gathered_outputs, output)
63 | 
64 |     combined_output = torch.cat(gathered_outputs, dim=1)
65 | 
66 |     # 验证结果一致性
67 |     if cur_rank == 0:
68 |         # import pdb; pdb.set_trace()
69 |         logger.info("Outputs match:", torch.allclose(single_gpu_output, combined_output, rtol=1e-3, atol=1e-3))
70 | 
71 |     # # 验证结果一致性
72 |     # logger.info("Outputs match:", torch.allclose(single_gpu_output, combined_output, rtol=1e-3, atol=1e-3))
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     # 初始化分布式环境
77 |     dist.init_process_group(backend="nccl")
78 |     test_part_head()
79 | 


--------------------------------------------------------------------------------
/lightx2v/attentions/distributed/partial_heads_attn/wrap.py:
--------------------------------------------------------------------------------
1 | from lightx2v.attentions.distributed.partial_heads_attn.attn import partial_heads_attn
2 | 
3 | 
4 | def parallelize_hunyuan(hunyuan_model):
5 |     hunyuan_model.transformer_infer.parallel_attention = partial_heads_attn
6 | 


--------------------------------------------------------------------------------
/lightx2v/attentions/distributed/ring/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/attentions/distributed/ring/__init__.py


--------------------------------------------------------------------------------
/lightx2v/attentions/distributed/ring/tests/test.sh:
--------------------------------------------------------------------------------
1 | lightx2v_path=""
2 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
3 | python3 test.py
4 | 


--------------------------------------------------------------------------------
/lightx2v/attentions/distributed/ring/wrap.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | from lightx2v.attentions.distributed.ring.attn import ring_attn
 3 | 
 4 | 
 5 | def parallelize_hunyuan(hunyuan_model):
 6 |     from lightx2v.attentions.distributed.utils.hunyuan.processor import pre_process, post_process
 7 | 
 8 |     """将 Hunyuan 模型的推理过程并行化，使用 Ulysses 注意力机制。
 9 | 
10 |     参数:
11 |         hunyuan_model: Hunyuan 模型实例，包含推理方法和其他属性。
12 |     """
13 |     # 将 Hunyuan 模型的并行注意力机制替换为 Ulysses 注意力
14 |     hunyuan_model.transformer_infer.parallel_attention = ring_attn
15 | 
16 |     # 保存原始的推理方法，以便后续调用
17 |     original_infer = hunyuan_model.infer
18 | 
19 |     @functools.wraps(hunyuan_model.__class__.infer)  # 保留原始推理方法的元信息
20 |     def new_infer(self, text_encoders_output, image_encoder_output, args):
21 |         """新的推理方法，处理输入并调用原始推理方法。
22 | 
23 |         参数:
24 |             self: Hunyuan 模型实例
25 |             text_encoders_output: 文本编码器的输出
26 |             args: 其他参数
27 | 
28 |         返回:
29 |             None
30 |         """
31 |         # 保存原始的潜在模型输入和频率数据
32 |         self.scheduler.ori_latents, self.scheduler.ori_freqs_cos, self.scheduler.ori_freqs_sin = (self.scheduler.latents, self.scheduler.freqs_cos, self.scheduler.freqs_sin)
33 | 
34 |         # 预处理输入数据以适应并行计算
35 |         self.scheduler.latents, self.scheduler.freqs_cos, self.scheduler.freqs_sin, split_dim = pre_process(self.scheduler.latents, self.scheduler.freqs_cos, self.scheduler.freqs_sin)
36 | 
37 |         # 调用原始推理方法，获取输出
38 |         original_infer(text_encoders_output, image_encoder_output, args)
39 | 
40 |         # 对输出进行后处理
41 |         self.scheduler.noise_pred = post_process(self.scheduler.noise_pred, split_dim)
42 | 
43 |         # 恢复原始的潜在模型输入和频率数据
44 |         self.scheduler.latents, self.scheduler.freqs_cos, self.scheduler.freqs_sin = (self.scheduler.ori_latents, self.scheduler.ori_freqs_cos, self.scheduler.ori_freqs_sin)
45 | 
46 |         # return combined_output  # 返回处理后的输出（当前被注释掉）
47 | 
48 |     # 将新的推理方法绑定到 Hunyuan 模型实例
49 |     new_infer = new_infer.__get__(hunyuan_model)
50 |     hunyuan_model.infer = new_infer  # 替换原始推理方法
51 | 
52 | 
53 | def parallelize_wan(wan_model):
54 |     from lightx2v.attentions.distributed.utils.wan.processor import pre_process, post_process
55 | 
56 |     wan_model.transformer_infer.parallel_attention = ring_attn
57 | 
58 |     original_infer = wan_model.transformer_infer.infer
59 | 
60 |     @functools.wraps(wan_model.transformer_infer.__class__.infer)  # 保留原始推理方法的元信息
61 |     def new_infer(self, weights, grid_sizes, embed, x, embed0, seq_lens, freqs, context):
62 |         x = pre_process(x)
63 | 
64 |         x = original_infer(weights, grid_sizes, embed, x, embed0, seq_lens, freqs, context)
65 | 
66 |         x = post_process(x)
67 | 
68 |         return x
69 | 
70 |     new_infer = new_infer.__get__(wan_model.transformer_infer)
71 |     wan_model.transformer_infer.infer = new_infer  # 替换原始推理方法
72 | 


--------------------------------------------------------------------------------
/lightx2v/attentions/distributed/ulysses/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/attentions/distributed/ulysses/__init__.py


--------------------------------------------------------------------------------
/lightx2v/attentions/distributed/ulysses/wrap.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | from lightx2v.attentions.distributed.ulysses.attn import ulysses_attn
 3 | 
 4 | 
 5 | def parallelize_hunyuan(hunyuan_model):
 6 |     from lightx2v.attentions.distributed.utils.hunyuan.processor import pre_process, post_process
 7 | 
 8 |     """将 Hunyuan 模型的推理过程并行化，使用 Ulysses 注意力机制。
 9 | 
10 |     参数:
11 |         hunyuan_model: Hunyuan 模型实例，包含推理方法和其他属性。
12 |     """
13 |     # 将 Hunyuan 模型的并行注意力机制替换为 Ulysses 注意力
14 |     hunyuan_model.transformer_infer.parallel_attention = ulysses_attn
15 | 
16 |     # 保存原始的推理方法，以便后续调用
17 |     original_infer = hunyuan_model.infer
18 | 
19 |     @functools.wraps(hunyuan_model.__class__.infer)  # 保留原始推理方法的元信息
20 |     def new_infer(self, text_encoders_output, image_encoder_output, args):
21 |         """新的推理方法，处理输入并调用原始推理方法。
22 | 
23 |         参数:
24 |             self: Hunyuan 模型实例
25 |             text_encoders_output: 文本编码器的输出
26 |             args: 其他参数
27 | 
28 |         返回:
29 |             None
30 |         """
31 |         # 保存原始的潜在模型输入和频率数据
32 |         self.scheduler.ori_latents, self.scheduler.ori_freqs_cos, self.scheduler.ori_freqs_sin = (self.scheduler.latents, self.scheduler.freqs_cos, self.scheduler.freqs_sin)
33 | 
34 |         # 预处理输入数据以适应并行计算
35 |         self.scheduler.latents, self.scheduler.freqs_cos, self.scheduler.freqs_sin, split_dim = pre_process(self.scheduler.latents, self.scheduler.freqs_cos, self.scheduler.freqs_sin)
36 | 
37 |         # 调用原始推理方法，获取输出
38 |         original_infer(text_encoders_output, image_encoder_output, args)
39 | 
40 |         # 对输出进行后处理
41 |         self.scheduler.noise_pred = post_process(self.scheduler.noise_pred, split_dim)
42 | 
43 |         # 恢复原始的潜在模型输入和频率数据
44 |         self.scheduler.latents, self.scheduler.freqs_cos, self.scheduler.freqs_sin = (self.scheduler.ori_latents, self.scheduler.ori_freqs_cos, self.scheduler.ori_freqs_sin)
45 | 
46 |         # return combined_output  # 返回处理后的输出（当前被注释掉）
47 | 
48 |     # 将新的推理方法绑定到 Hunyuan 模型实例
49 |     new_infer = new_infer.__get__(hunyuan_model)
50 |     hunyuan_model.infer = new_infer  # 替换原始推理方法
51 | 
52 | 
53 | def parallelize_wan(wan_model):
54 |     from lightx2v.attentions.distributed.utils.wan.processor import pre_process, post_process
55 | 
56 |     wan_model.transformer_infer.parallel_attention = ulysses_attn
57 | 
58 |     original_infer = wan_model.transformer_infer.infer
59 | 
60 |     @functools.wraps(wan_model.transformer_infer.__class__.infer)  # 保留原始推理方法的元信息
61 |     def new_infer(self, weights, grid_sizes, embed, x, embed0, seq_lens, freqs, context):
62 |         x = pre_process(x)
63 | 
64 |         x = original_infer(weights, grid_sizes, embed, x, embed0, seq_lens, freqs, context)
65 | 
66 |         x = post_process(x)
67 | 
68 |         return x
69 | 
70 |     new_infer = new_infer.__get__(wan_model.transformer_infer)
71 |     wan_model.transformer_infer.infer = new_infer  # 替换原始推理方法
72 | 


--------------------------------------------------------------------------------
/lightx2v/attentions/distributed/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/attentions/distributed/utils/__init__.py


--------------------------------------------------------------------------------
/lightx2v/attentions/distributed/utils/hunyuan/processor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.distributed as dist
 3 | 
 4 | 
 5 | def pre_process(latent_model_input, freqs_cos, freqs_sin):
 6 |     """
 7 |     对输入的潜在模型数据和频率数据进行预处理，进行切分以适应分布式计算。
 8 | 
 9 |     参数:
10 |         latent_model_input (torch.Tensor): 输入的潜在模型数据，形状为 [batch_size, channels, temporal_size, height, width]
11 |         freqs_cos (torch.Tensor): 余弦频率数据，形状为 [batch_size, channels, temporal_size, height, width]
12 |         freqs_sin (torch.Tensor): 正弦频率数据，形状为 [batch_size, channels, temporal_size, height, width]
13 | 
14 |     返回:
15 |         tuple: 处理后的 latent_model_input, freqs_cos, freqs_sin 和切分维度 split_dim
16 |     """
17 |     # 获取当前进程的世界大小和当前进程的排名
18 |     world_size = dist.get_world_size()
19 |     cur_rank = dist.get_rank()
20 | 
21 |     # 根据输入的形状确定切分维度
22 |     if latent_model_input.shape[-2] // 2 % world_size == 0:
23 |         split_dim = -2  # 按高度切分
24 |     elif latent_model_input.shape[-1] // 2 % world_size == 0:
25 |         split_dim = -1  # 按宽度切分
26 |     else:
27 |         raise ValueError(f"Cannot split video sequence into world size ({world_size}) parts evenly")
28 | 
29 |     # 获取时间维度、处理后的高度和宽度
30 |     temporal_size, h, w = latent_model_input.shape[2], latent_model_input.shape[3] // 2, latent_model_input.shape[4] // 2
31 | 
32 |     # 按照确定的维度切分潜在模型输入
33 |     latent_model_input = torch.chunk(latent_model_input, world_size, dim=split_dim)[cur_rank]
34 | 
35 |     # 处理余弦频率数据
36 |     dim_thw = freqs_cos.shape[-1]  # 获取频率数据的最后一个维度
37 |     freqs_cos = freqs_cos.reshape(temporal_size, h, w, dim_thw)  # 重塑为 [temporal_size, height, width, dim_thw]
38 |     freqs_cos = torch.chunk(freqs_cos, world_size, dim=split_dim - 1)[cur_rank]  # 切分频率数据
39 |     freqs_cos = freqs_cos.reshape(-1, dim_thw)  # 重塑为 [batch_size, dim_thw]
40 | 
41 |     # 处理正弦频率数据
42 |     dim_thw = freqs_sin.shape[-1]  # 获取频率数据的最后一个维度
43 |     freqs_sin = freqs_sin.reshape(temporal_size, h, w, dim_thw)  # 重塑为 [temporal_size, height, width, dim_thw]
44 |     freqs_sin = torch.chunk(freqs_sin, world_size, dim=split_dim - 1)[cur_rank]  # 切分频率数据
45 |     freqs_sin = freqs_sin.reshape(-1, dim_thw)  # 重塑为 [batch_size, dim_thw]
46 | 
47 |     return latent_model_input, freqs_cos, freqs_sin, split_dim  # 返回处理后的数据
48 | 
49 | 
50 | def post_process(output, split_dim):
51 |     """对输出进行后处理，收集所有进程的输出并合并。
52 | 
53 |     参数:
54 |         output (torch.Tensor): 当前进程的输出，形状为 [batch_size, ...]
55 |         split_dim (int): 切分维度，用于合并输出
56 | 
57 |     返回:
58 |         torch.Tensor: 合并后的输出，形状为 [world_size * batch_size, ...]
59 |     """
60 |     # 获取当前进程的世界大小
61 |     world_size = dist.get_world_size()
62 | 
63 |     # 创建一个列表，用于存储所有进程的输出
64 |     gathered_outputs = [torch.empty_like(output) for _ in range(world_size)]
65 | 
66 |     # 收集所有进程的输出
67 |     dist.all_gather(gathered_outputs, output)
68 | 
69 |     # 在指定的维度上合并所有进程的输出
70 |     combined_output = torch.cat(gathered_outputs, dim=split_dim)
71 | 
72 |     return combined_output  # 返回合并后的输出
73 | 


--------------------------------------------------------------------------------
/lightx2v/attentions/distributed/utils/process.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.distributed as dist
 3 | 
 4 | 
 5 | def pre_process(latent_model_input, freqs_cos, freqs_sin):
 6 |     """
 7 |     对输入的潜在模型数据和频率数据进行预处理，进行切分以适应分布式计算。
 8 | 
 9 |     参数:
10 |         latent_model_input (torch.Tensor): 输入的潜在模型数据，形状为 [batch_size, channels, temporal_size, height, width]
11 |         freqs_cos (torch.Tensor): 余弦频率数据，形状为 [batch_size, channels, temporal_size, height, width]
12 |         freqs_sin (torch.Tensor): 正弦频率数据，形状为 [batch_size, channels, temporal_size, height, width]
13 | 
14 |     返回:
15 |         tuple: 处理后的 latent_model_input, freqs_cos, freqs_sin 和切分维度 split_dim
16 |     """
17 |     # 获取当前进程的世界大小和当前进程的排名
18 |     world_size = dist.get_world_size()
19 |     cur_rank = dist.get_rank()
20 | 
21 |     # 根据输入的形状确定切分维度
22 |     if latent_model_input.shape[-2] // 2 % world_size == 0:
23 |         split_dim = -2  # 按高度切分
24 |     elif latent_model_input.shape[-1] // 2 % world_size == 0:
25 |         split_dim = -1  # 按宽度切分
26 |     else:
27 |         raise ValueError(f"Cannot split video sequence into world size ({world_size}) parts evenly")
28 | 
29 |     # 获取时间维度、处理后的高度和宽度
30 |     temporal_size, h, w = latent_model_input.shape[2], latent_model_input.shape[3] // 2, latent_model_input.shape[4] // 2
31 | 
32 |     # 按照确定的维度切分潜在模型输入
33 |     latent_model_input = torch.chunk(latent_model_input, world_size, dim=split_dim)[cur_rank]
34 | 
35 |     # 处理余弦频率数据
36 |     dim_thw = freqs_cos.shape[-1]  # 获取频率数据的最后一个维度
37 |     freqs_cos = freqs_cos.reshape(temporal_size, h, w, dim_thw)  # 重塑为 [temporal_size, height, width, dim_thw]
38 |     freqs_cos = torch.chunk(freqs_cos, world_size, dim=split_dim - 1)[cur_rank]  # 切分频率数据
39 |     freqs_cos = freqs_cos.reshape(-1, dim_thw)  # 重塑为 [batch_size, dim_thw]
40 | 
41 |     # 处理正弦频率数据
42 |     dim_thw = freqs_sin.shape[-1]  # 获取频率数据的最后一个维度
43 |     freqs_sin = freqs_sin.reshape(temporal_size, h, w, dim_thw)  # 重塑为 [temporal_size, height, width, dim_thw]
44 |     freqs_sin = torch.chunk(freqs_sin, world_size, dim=split_dim - 1)[cur_rank]  # 切分频率数据
45 |     freqs_sin = freqs_sin.reshape(-1, dim_thw)  # 重塑为 [batch_size, dim_thw]
46 | 
47 |     return latent_model_input, freqs_cos, freqs_sin, split_dim  # 返回处理后的数据
48 | 
49 | 
50 | def post_process(output, split_dim):
51 |     """对输出进行后处理，收集所有进程的输出并合并。
52 | 
53 |     参数:
54 |         output (torch.Tensor): 当前进程的输出，形状为 [batch_size, ...]
55 |         split_dim (int): 切分维度，用于合并输出
56 | 
57 |     返回:
58 |         torch.Tensor: 合并后的输出，形状为 [world_size * batch_size, ...]
59 |     """
60 |     # 获取当前进程的世界大小
61 |     world_size = dist.get_world_size()
62 | 
63 |     # 创建一个列表，用于存储所有进程的输出
64 |     gathered_outputs = [torch.empty_like(output) for _ in range(world_size)]
65 | 
66 |     # 收集所有进程的输出
67 |     dist.all_gather(gathered_outputs, output)
68 | 
69 |     # 在指定的维度上合并所有进程的输出
70 |     combined_output = torch.cat(gathered_outputs, dim=split_dim)
71 | 
72 |     return combined_output  # 返回合并后的输出
73 | 


--------------------------------------------------------------------------------
/lightx2v/attentions/distributed/utils/wan/processor.py:
--------------------------------------------------------------------------------
 1 | from re import split
 2 | import torch
 3 | import torch.distributed as dist
 4 | import torch.nn.functional as F
 5 | 
 6 | PADDING_SIZE = None
 7 | 
 8 | 
 9 | def pre_process(x):
10 |     world_size = dist.get_world_size()
11 |     cur_rank = dist.get_rank()
12 | 
13 |     padding_size = (world_size - (x.shape[0] % world_size)) % world_size
14 | 
15 |     if padding_size > 0:
16 |         # 使用 F.pad 填充第一维
17 |         x = F.pad(x, (0, 0, 0, padding_size))  # (后维度填充, 前维度填充)
18 | 
19 |     x = torch.chunk(x, world_size, dim=0)[cur_rank]
20 | 
21 |     return x
22 | 
23 | 
24 | def post_process(x):
25 |     # 获取当前进程的世界大小
26 |     world_size = dist.get_world_size()
27 | 
28 |     # 创建一个列表，用于存储所有进程的输出
29 |     gathered_x = [torch.empty_like(x) for _ in range(world_size)]
30 | 
31 |     # 收集所有进程的输出
32 |     dist.all_gather(gathered_x, x)
33 | 
34 |     # 在指定的维度上合并所有进程的输出
35 |     combined_output = torch.cat(gathered_x, dim=0)
36 | 
37 |     return combined_output  # 返回合并后的输出
38 | 


--------------------------------------------------------------------------------
/lightx2v/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/common/__init__.py


--------------------------------------------------------------------------------
/lightx2v/common/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/common/modules/__init__.py


--------------------------------------------------------------------------------
/lightx2v/common/offload/manager.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class WeightAsyncStreamManager(object):
 5 |     def __init__(self, blocks_num, offload_ratio=1, phases_num=1):
 6 |         self.active_weights = [None for _ in range(3)]
 7 |         self.active_weights = [None for _ in range(3)]
 8 |         self.compute_stream = torch.cuda.Stream(priority=-1)
 9 |         self.cpu_load_stream = torch.cuda.Stream(priority=0)
10 |         self.cuda_load_stream = torch.cuda.Stream(priority=0)
11 |         self.offload_block_num = offload_ratio * blocks_num
12 |         self.phases_num = phases_num
13 |         self.offload_phases_num = blocks_num * phases_num * offload_ratio
14 | 
15 |     def prefetch_weights(self, block_idx, blocks_weights):
16 |         with torch.cuda.stream(self.cuda_load_stream):
17 |             self.active_weights[2] = blocks_weights[block_idx]
18 |             self.active_weights[2].to_cuda_async()
19 |         with torch.cuda.stream(self.cpu_load_stream):
20 |             if block_idx < self.offload_block_num:
21 |                 if self.active_weights[1] is not None:
22 |                     self.active_weights[1].to_cpu_async()
23 | 
24 |     def swap_weights(self):
25 |         self.compute_stream.synchronize()
26 |         self.cpu_load_stream.synchronize()
27 |         self.cuda_load_stream.synchronize()
28 | 
29 |         self.active_weights[0], self.active_weights[1] = (
30 |             self.active_weights[2],
31 |             self.active_weights[0],
32 |         )
33 | 
34 |     def prefetch_phase(self, block_idx, phase_idx, blocks):
35 |         with torch.cuda.stream(self.cuda_load_stream):
36 |             new_phase = blocks[block_idx].compute_phases[phase_idx]
37 |             new_phase.to_cuda_async()
38 |             self.active_weights[2] = (phase_idx, blocks[block_idx].compute_phases[phase_idx])
39 |         with torch.cuda.stream(self.cpu_load_stream):
40 |             if block_idx * self.phases_num + phase_idx < self.offload_phases_num:
41 |                 if self.active_weights[1] is not None:
42 |                     _, old_phase = self.active_weights[1]
43 |                     old_phase.to_cpu_async()
44 | 
45 |     def swap_phases(self):
46 |         self.compute_stream.synchronize()
47 |         self.cpu_load_stream.synchronize()
48 |         self.cuda_load_stream.synchronize()
49 |         self.active_weights[0], self.active_weights[1] = self.active_weights[2], self.active_weights[0]
50 | 


--------------------------------------------------------------------------------
/lightx2v/common/ops/__init__.py:
--------------------------------------------------------------------------------
1 | from .mm import *
2 | from .norm import *
3 | from .conv import *
4 | from .tensor import *
5 | from .attn import *
6 | 


--------------------------------------------------------------------------------
/lightx2v/common/ops/attn/__init__.py:
--------------------------------------------------------------------------------
1 | from .attn_weight import *
2 | 


--------------------------------------------------------------------------------
/lightx2v/common/ops/conv/__init__.py:
--------------------------------------------------------------------------------
1 | from .conv2d import *
2 | from .conv3d import *
3 | 


--------------------------------------------------------------------------------
/lightx2v/common/ops/conv/conv2d.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from abc import ABCMeta, abstractmethod
 3 | from lightx2v.utils.registry_factory import CONV2D_WEIGHT_REGISTER
 4 | 
 5 | 
 6 | class Conv2dWeightTemplate(metaclass=ABCMeta):
 7 |     def __init__(self, weight_name, bias_name, stride, padding, dilation, groups):
 8 |         self.weight_name = weight_name
 9 |         self.bias_name = bias_name
10 |         self.stride = stride
11 |         self.padding = padding
12 |         self.dilation = dilation
13 |         self.groups = groups
14 |         self.config = {}
15 | 
16 |     @abstractmethod
17 |     def load(self, weight_dict):
18 |         pass
19 | 
20 |     @abstractmethod
21 |     def apply(self, input_tensor):
22 |         pass
23 | 
24 |     def set_config(self, config=None):
25 |         if config is not None:
26 |             self.config = config
27 | 
28 | 
29 | @CONV2D_WEIGHT_REGISTER("Default")
30 | class Conv2dWeight(Conv2dWeightTemplate):
31 |     def __init__(self, weight_name, bias_name, stride=1, padding=0, dilation=1, groups=1):
32 |         super().__init__(weight_name, bias_name, stride, padding, dilation, groups)
33 | 
34 |     def load(self, weight_dict):
35 |         self.weight = weight_dict[self.weight_name].cuda()
36 |         self.bias = weight_dict[self.bias_name].cuda() if self.bias_name is not None else None
37 | 
38 |     def apply(self, input_tensor):
39 |         input_tensor = torch.nn.functional.conv2d(input_tensor, weight=self.weight, bias=self.bias, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups)
40 |         return input_tensor
41 | 
42 |     def to_cpu(self, non_blocking=False):
43 |         self.weight = self.weight.cpu(non_blocking=non_blocking)
44 |         if self.bias is not None:
45 |             self.bias = self.bias.cpu(non_blocking=non_blocking)
46 | 
47 |     def to_cuda(self, non_blocking=False):
48 |         self.weight = self.weight.cuda(non_blocking=non_blocking)
49 |         if self.bias is not None:
50 |             self.bias = self.bias.cuda(non_blocking=non_blocking)
51 | 
52 |     def state_dict(self, destination=None):
53 |         if destination is None:
54 |             destination = {}
55 |         destination[self.weight_name] = self.weight.cpu().detach().clone()
56 |         if self.bias is not None:
57 |             destination[self.bias_name] = self.bias.cpu().detach().clone()
58 |         return destination
59 | 


--------------------------------------------------------------------------------
/lightx2v/common/ops/conv/conv3d.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from abc import ABCMeta, abstractmethod
 3 | from lightx2v.utils.registry_factory import CONV3D_WEIGHT_REGISTER
 4 | 
 5 | 
 6 | class Conv3dWeightTemplate(metaclass=ABCMeta):
 7 |     def __init__(self, weight_name, bias_name, stride=1, padding=0, dilation=1, groups=1):
 8 |         self.weight_name = weight_name
 9 |         self.bias_name = bias_name
10 |         self.stride = stride
11 |         self.padding = padding
12 |         self.dilation = dilation
13 |         self.groups = groups
14 |         self.config = {}
15 | 
16 |     @abstractmethod
17 |     def load(self, weight_dict):
18 |         pass
19 | 
20 |     @abstractmethod
21 |     def apply(self, input_tensor):
22 |         pass
23 | 
24 |     def set_config(self, config=None):
25 |         if config is not None:
26 |             self.config = config
27 | 
28 | 
29 | @CONV3D_WEIGHT_REGISTER("Default")
30 | class Conv3dWeight(Conv3dWeightTemplate):
31 |     def __init__(self, weight_name, bias_name, stride=1, padding=0, dilation=1, groups=1):
32 |         super().__init__(weight_name, bias_name, stride, padding, dilation, groups)
33 | 
34 |     def load(self, weight_dict):
35 |         self.weight = weight_dict[self.weight_name].cuda()
36 |         self.bias = weight_dict[self.bias_name].cuda() if self.bias_name is not None else None
37 | 
38 |     def apply(self, input_tensor):
39 |         input_tensor = torch.nn.functional.conv3d(input_tensor, weight=self.weight, bias=self.bias, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups)
40 |         return input_tensor
41 | 
42 |     def to_cpu(self, non_blocking=False):
43 |         self.weight = self.weight.to("cpu", non_blocking=non_blocking)
44 |         if self.bias is not None:
45 |             self.bias = self.bias.to("cpu", non_blocking=non_blocking)
46 | 
47 |     def to_cuda(self, non_blocking=False):
48 |         self.weight = self.weight.cuda(non_blocking=non_blocking)
49 |         if self.bias is not None:
50 |             self.bias = self.bias.cuda(non_blocking=non_blocking)
51 | 
52 |     def state_dict(self, destination=None):
53 |         if destination is None:
54 |             destination = {}
55 |         destination[self.weight_name] = self.weight.cpu().detach().clone()
56 |         if self.bias is not None:
57 |             destination[self.bias_name] = self.bias.cpu().detach().clone()
58 |         return destination
59 | 
60 | 
61 | @CONV3D_WEIGHT_REGISTER("Defaultt-Force-BF16")
62 | class Conv3dWeightForceBF16(Conv3dWeight):
63 |     def __init__(self, weight_name, bias_name, stride=1, padding=0, dilation=1, groups=1):
64 |         super().__init__(weight_name, bias_name, stride, padding, dilation, groups)
65 | 
66 |     def load(self, weight_dict):
67 |         self.weight = weight_dict[self.weight_name].to(torch.bfloat16).cuda()
68 |         self.bias = weight_dict[self.bias_name].to(torch.bfloat16).cuda() if self.bias_name is not None else None
69 | 


--------------------------------------------------------------------------------
/lightx2v/common/ops/mm/__init__.py:
--------------------------------------------------------------------------------
1 | from .mm_weight import *
2 | from .mm_weight_calib import *
3 | 


--------------------------------------------------------------------------------
/lightx2v/common/ops/mm/mm_weight_calib.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from .mm_weight import MMWeight
 3 | from lightx2v.utils.registry_factory import MM_WEIGHT_REGISTER
 4 | from lightx2v.utils.quant_utils import IntegerQuantizer, FloatQuantizer
 5 | 
 6 | 
 7 | @MM_WEIGHT_REGISTER("Calib")
 8 | class MMWeightCalib(MMWeight):
 9 |     def __init__(self, weight_name, bias_name):
10 |         super().__init__(weight_name, bias_name)
11 | 
12 |     def load(self, weight_dict):
13 |         assert self.config and self.config.get("mm_type", "Default") != "Default"
14 |         self.weight = weight_dict[self.weight_name]
15 |         self.get_quantizer()
16 |         shape_and_dtype = self.get_quant_shape_and_dtype(self.weight.shape)
17 |         self.realq_weight, self.scales, self.zeros = self.w_quantizer.real_quant_tensor(self.weight)
18 |         self.realq_weight = self.realq_weight.view(shape_and_dtype["tensor"][0]).contiguous().to(shape_and_dtype["tensor"][1])
19 |         self.scales = self.scales.view(shape_and_dtype["scales"][0]).contiguous().to(shape_and_dtype["scales"][1])
20 |         if self.zeros is not None:
21 |             self.zeros = self.zeros.view(shape_and_dtype["zeros"][0]).contiguous().to(shape_and_dtype["zeros"][1])
22 | 
23 |     def apply(self, input_tensor):
24 |         return super().apply(input_tensor)
25 | 
26 |     def get_quantizer(self):
27 |         if self.config["mm_type"] == "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Vllm":
28 |             self.w_setting = {"bit": "e4m3", "symmetric": True, "granularity": "per_channel"}
29 |             self.a_setting = {"bit": "e4m3", "symmetric": True, "granularity": "per_channel"}
30 |             self.w_quantizer = FloatQuantizer(**self.w_setting)
31 |             self.a_quantizer = FloatQuantizer(**self.a_setting)
32 |             self.act_dynamic_quant = True
33 |         else:
34 |             raise NotImplementedError(f"Unsupported mm_type: {self.config['mm_type']}")
35 | 
36 |     def get_quant_shape_and_dtype(self, shape):
37 |         if self.config["mm_type"] == "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Vllm":
38 |             return {
39 |                 "tensor": (shape, torch.float8_e5m2),
40 |                 "scales": ((shape[0], 1), torch.float32),
41 |                 "zeros": None,
42 |             }
43 |         else:
44 |             raise NotImplementedError(f"Unsupported mm_type: {self.config['mm_type']}")
45 | 


--------------------------------------------------------------------------------
/lightx2v/common/ops/norm/__init__.py:
--------------------------------------------------------------------------------
1 | from .rms_norm_weight import *
2 | from .layer_norm_weight import *
3 | 


--------------------------------------------------------------------------------
/lightx2v/common/ops/norm/layer_norm_weight.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from abc import ABCMeta, abstractmethod
 3 | from lightx2v.utils.registry_factory import LN_WEIGHT_REGISTER
 4 | 
 5 | 
 6 | class LNWeightTemplate(metaclass=ABCMeta):
 7 |     def __init__(self, weight_name, bias_name, eps=1e-6):
 8 |         self.weight_name = weight_name
 9 |         self.bias_name = bias_name
10 |         self.eps = eps
11 |         self.config = {}
12 | 
13 |     def load(self, weight_dict):
14 |         self.weight = weight_dict[self.weight_name].cuda() if self.weight_name is not None else None
15 |         self.bias = weight_dict[self.bias_name].cuda() if self.bias_name is not None else None
16 | 
17 |     @abstractmethod
18 |     def apply(self, input_tensor):
19 |         pass
20 | 
21 |     def set_config(self, config=None):
22 |         if config is not None:
23 |             self.config = config
24 | 
25 |     def to_cpu(self, non_blocking=False):
26 |         if self.weight is not None:
27 |             self.weight = self.weight.to("cpu", non_blocking=non_blocking)
28 |         if self.bias is not None:
29 |             self.bias = self.bias.to("cpu", non_blocking=non_blocking)
30 | 
31 |     def to_cuda(self, non_blocking=False):
32 |         if self.weight is not None:
33 |             self.weight = self.weight.cuda(non_blocking=non_blocking)
34 |         if self.bias is not None:
35 |             self.bias = self.bias.cuda(non_blocking=non_blocking)
36 | 
37 |     def state_dict(self, destination=None):
38 |         if destination is None:
39 |             destination = {}
40 |         if self.weight is not None:
41 |             destination[self.weight_name] = self.weight.cpu().detach().clone()
42 |         if self.bias is not None:
43 |             destination[self.bias_name] = self.bias.cpu().detach().clone()
44 |         return destination
45 | 
46 | 
47 | @LN_WEIGHT_REGISTER("Default")
48 | class LNWeight(LNWeightTemplate):
49 |     def __init__(self, weight_name, bias_name, eps=1e-6):
50 |         super().__init__(weight_name, bias_name, eps)
51 | 
52 |     def apply(self, input_tensor):
53 |         input_tensor = torch.nn.functional.layer_norm(input_tensor, (input_tensor.shape[-1],), self.weight, self.bias, self.eps)
54 |         return input_tensor
55 | 


--------------------------------------------------------------------------------
/lightx2v/common/ops/norm/rms_norm_weight.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from abc import ABCMeta, abstractmethod
 3 | from lightx2v.utils.registry_factory import RMS_WEIGHT_REGISTER
 4 | import sgl_kernel
 5 | 
 6 | 
 7 | class RMSWeightTemplate(metaclass=ABCMeta):
 8 |     def __init__(self, weight_name, eps=1e-6):
 9 |         self.weight_name = weight_name
10 |         self.eps = eps
11 |         self.config = {}
12 | 
13 |     def load(self, weight_dict):
14 |         self.weight = weight_dict[self.weight_name].cuda()
15 | 
16 |     @abstractmethod
17 |     def apply(self, input_tensor):
18 |         pass
19 | 
20 |     def set_config(self, config=None):
21 |         if config is not None:
22 |             self.config = config
23 | 
24 |     def to_cpu(self, non_blocking=False):
25 |         self.weight = self.weight.to("cpu", non_blocking=non_blocking)
26 | 
27 |     def to_cuda(self, non_blocking=False):
28 |         self.weight = self.weight.cuda(non_blocking=non_blocking)
29 | 
30 | 
31 | @RMS_WEIGHT_REGISTER("Default")
32 | class RMSWeight(RMSWeightTemplate):
33 |     def __init__(self, weight_name, eps=1e-6):
34 |         super().__init__(weight_name, eps)
35 | 
36 |     def apply(self, input_tensor):
37 |         input_tensor = input_tensor * torch.rsqrt(input_tensor.pow(2).mean(-1, keepdim=True) + self.eps)
38 |         input_tensor = input_tensor * self.weight
39 |         return input_tensor
40 | 
41 |     def state_dict(self, destination=None):
42 |         if destination is None:
43 |             destination = {}
44 |         destination[self.weight_name] = self.weight.cpu().detach().clone()
45 |         return destination
46 | 
47 | 
48 | @RMS_WEIGHT_REGISTER("FP32")
49 | class RMSWeightFP32(RMSWeight):
50 |     def __init__(self, weight_name, eps=1e-6):
51 |         super().__init__(weight_name, eps)
52 | 
53 |     def apply(self, input_tensor):
54 |         input_tensor = input_tensor.float()
55 |         input_tensor = input_tensor * torch.rsqrt(input_tensor.pow(2).mean(-1, keepdim=True) + self.eps)
56 |         input_tensor = input_tensor.to(torch.bfloat16)
57 |         input_tensor = input_tensor * self.weight
58 |         return input_tensor
59 | 
60 | 
61 | @RMS_WEIGHT_REGISTER("sgl-kernel")
62 | class RMSWeightSgl(RMSWeight):
63 |     def __init__(self, weight_name, eps=1e-6):
64 |         super().__init__(weight_name, eps)
65 | 
66 |     def apply(self, input_tensor):
67 |         input_tensor = input_tensor.contiguous()
68 |         orig_shape = input_tensor.shape
69 |         input_tensor = input_tensor.view(-1, orig_shape[-1])
70 |         input_tensor = sgl_kernel.rmsnorm(input_tensor, self.weight, self.eps).view(orig_shape)
71 |         return input_tensor
72 | 


--------------------------------------------------------------------------------
/lightx2v/common/ops/tensor/__init__.py:
--------------------------------------------------------------------------------
1 | from .tensor import DefaultTensor
2 | 


--------------------------------------------------------------------------------
/lightx2v/common/ops/tensor/tensor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from lightx2v.utils.registry_factory import TENSOR_REGISTER
 3 | 
 4 | 
 5 | @TENSOR_REGISTER("Default")
 6 | class DefaultTensor:
 7 |     def __init__(self, tensor_name):
 8 |         self.tensor_name = tensor_name
 9 | 
10 |     def load(self, weight_dict):
11 |         self.tensor = weight_dict[self.tensor_name]
12 |         self.pinned_tensor = torch.empty(self.tensor.shape, pin_memory=True, dtype=self.tensor.dtype)
13 | 
14 |     def to_cpu(self, non_blocking=False):
15 |         # self.tensor = self.tensor.to("cpu", non_blocking=non_blocking)
16 |         self.tensor = self.pinned_tensor.copy_(self.tensor, non_blocking=non_blocking).cpu()
17 | 
18 |     def to_cuda(self, non_blocking=False):
19 |         self.tensor = self.tensor.cuda(non_blocking=non_blocking)
20 | 
21 |     def state_dict(self, destination=None):
22 |         if destination is None:
23 |             destination = {}
24 |         destination[self.tensor_name] = self.tensor.cpu().detach().clone()
25 |         return destination
26 | 


--------------------------------------------------------------------------------
/lightx2v/infer.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import argparse
 3 | import torch
 4 | import torch.distributed as dist
 5 | import json
 6 | 
 7 | from lightx2v.utils.envs import *
 8 | from lightx2v.utils.utils import seed_all
 9 | from lightx2v.utils.profiler import ProfilingContext
10 | from lightx2v.utils.set_config import set_config
11 | from lightx2v.utils.registry_factory import RUNNER_REGISTER
12 | 
13 | from lightx2v.models.runners.hunyuan.hunyuan_runner import HunyuanRunner
14 | from lightx2v.models.runners.wan.wan_runner import WanRunner
15 | from lightx2v.models.runners.wan.wan_causvid_runner import WanCausVidRunner
16 | from lightx2v.models.runners.wan.wan_skyreels_v2_df_runner import WanSkyreelsV2DFRunner
17 | from lightx2v.models.runners.graph_runner import GraphRunner
18 | from lightx2v.models.runners.cogvideox.cogvidex_runner import CogvideoxRunner
19 | 
20 | from lightx2v.common.ops import *
21 | from loguru import logger
22 | 
23 | 
24 | def init_runner(config):
25 |     seed_all(config.seed)
26 | 
27 |     if config.parallel_attn_type:
28 |         dist.init_process_group(backend="nccl")
29 | 
30 |     if CHECK_ENABLE_GRAPH_MODE():
31 |         default_runner = RUNNER_REGISTER[config.model_cls](config)
32 |         runner = GraphRunner(default_runner)
33 |     else:
34 |         runner = RUNNER_REGISTER[config.model_cls](config)
35 |     runner.init_modules()
36 |     return runner
37 | 
38 | 
39 | async def main():
40 |     parser = argparse.ArgumentParser()
41 |     parser.add_argument("--model_cls", type=str, required=True, choices=["wan2.1", "hunyuan", "wan2.1_causvid", "wan2.1_skyreels_v2_df", "cogvideox"], default="hunyuan")
42 |     parser.add_argument("--task", type=str, choices=["t2v", "i2v"], default="t2v")
43 |     parser.add_argument("--model_path", type=str, required=True)
44 |     parser.add_argument("--config_json", type=str, required=True)
45 |     parser.add_argument("--prompt_enhancer", type=str, default=None)
46 | 
47 |     parser.add_argument("--prompt", type=str, required=True)
48 |     parser.add_argument("--negative_prompt", type=str, default="")
49 |     parser.add_argument("--image_path", type=str, default="", help="The path to input image file or path for image-to-video (i2v) task")
50 |     parser.add_argument("--save_video_path", type=str, default="./output_lightx2v.mp4", help="The path to save video path/file")
51 |     args = parser.parse_args()
52 |     logger.info(f"args: {args}")
53 | 
54 |     with ProfilingContext("Total Cost"):
55 |         config = set_config(args)
56 |         config["mode"] = "infer"
57 |         logger.info(f"config:\n{json.dumps(config, ensure_ascii=False, indent=4)}")
58 |         runner = init_runner(config)
59 | 
60 |         await runner.run_pipeline()
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     asyncio.run(main())
65 | 


--------------------------------------------------------------------------------
/lightx2v/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/input_encoders/hf/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/input_encoders/hf/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/input_encoders/hf/clip/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/input_encoders/hf/clip/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/input_encoders/hf/clip/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from transformers import CLIPTextModel, AutoTokenizer
 3 | from loguru import logger
 4 | 
 5 | 
 6 | class TextEncoderHFClipModel:
 7 |     def __init__(self, model_path, device):
 8 |         self.device = device
 9 |         self.model_path = model_path
10 |         self.init()
11 |         self.load()
12 | 
13 |     def init(self):
14 |         self.max_length = 77
15 | 
16 |     def load(self):
17 |         self.model = CLIPTextModel.from_pretrained(self.model_path).to(torch.float16).to(self.device)
18 |         self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, padding_side="right")
19 | 
20 |     def to_cpu(self):
21 |         self.model = self.model.to("cpu")
22 | 
23 |     def to_cuda(self):
24 |         self.model = self.model.to("cuda")
25 | 
26 |     @torch.no_grad()
27 |     def infer(self, text, config):
28 |         if config.cpu_offload:
29 |             self.to_cuda()
30 |         tokens = self.tokenizer(
31 |             text,
32 |             return_length=False,
33 |             return_overflowing_tokens=False,
34 |             return_attention_mask=True,
35 |             truncation=True,
36 |             max_length=self.max_length,
37 |             padding="max_length",
38 |             return_tensors="pt",
39 |         ).to("cuda")
40 | 
41 |         outputs = self.model(
42 |             input_ids=tokens["input_ids"],
43 |             attention_mask=tokens["attention_mask"],
44 |             output_hidden_states=False,
45 |         )
46 | 
47 |         last_hidden_state = outputs["pooler_output"]
48 |         if config.cpu_offload:
49 |             self.to_cpu()
50 |         return last_hidden_state, tokens["attention_mask"]
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     model_path = ""
55 |     model = TextEncoderHFClipModel(model_path, torch.device("cuda"))
56 |     text = "A cat walks on the grass, realistic style."
57 |     outputs = model.infer(text)
58 |     logger.info(outputs)
59 | 


--------------------------------------------------------------------------------
/lightx2v/models/input_encoders/hf/llama/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/input_encoders/hf/llama/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/input_encoders/hf/llama/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from transformers import AutoModel, AutoTokenizer
 3 | from loguru import logger
 4 | 
 5 | 
 6 | class TextEncoderHFLlamaModel:
 7 |     def __init__(self, model_path, device):
 8 |         self.device = device
 9 |         self.model_path = model_path
10 |         self.init()
11 |         self.load()
12 | 
13 |     def init(self):
14 |         self.max_length = 351
15 |         self.hidden_state_skip_layer = 2
16 |         self.crop_start = 95
17 |         self.prompt_template = (
18 |             "<|start_header_id|>system<|end_header_id|>\n\nDescribe the video by detailing the following aspects: "
19 |             "1. The main content and theme of the video."
20 |             "2. The color, shape, size, texture, quantity, text, and spatial relationships of the objects."
21 |             "3. Actions, events, behaviors temporal relationships, physical movement changes of the objects."
22 |             "4. background environment, light, style and atmosphere."
23 |             "5. camera angles, movements, and transitions used in the video:<|eot_id|>"
24 |             "<|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|>"
25 |         )
26 | 
27 |     def load(self):
28 |         self.model = AutoModel.from_pretrained(self.model_path, low_cpu_mem_usage=True).to(torch.float16).to(self.device)
29 |         self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, padding_side="right")
30 | 
31 |     def to_cpu(self):
32 |         self.model = self.model.to("cpu")
33 | 
34 |     def to_cuda(self):
35 |         self.model = self.model.to("cuda")
36 | 
37 |     @torch.no_grad()
38 |     def infer(self, text, config):
39 |         if config.cpu_offload:
40 |             self.to_cuda()
41 |         text = self.prompt_template.format(text)
42 |         tokens = self.tokenizer(
43 |             text,
44 |             return_length=False,
45 |             return_overflowing_tokens=False,
46 |             return_attention_mask=True,
47 |             truncation=True,
48 |             max_length=self.max_length,
49 |             padding="max_length",
50 |             return_tensors="pt",
51 |         ).to("cuda")
52 | 
53 |         outputs = self.model(
54 |             input_ids=tokens["input_ids"],
55 |             attention_mask=tokens["attention_mask"],
56 |             output_hidden_states=True,
57 |         )
58 | 
59 |         last_hidden_state = outputs.hidden_states[-(self.hidden_state_skip_layer + 1)][:, self.crop_start :]
60 |         attention_mask = tokens["attention_mask"][:, self.crop_start :]
61 |         if config.cpu_offload:
62 |             self.to_cpu()
63 |         return last_hidden_state, attention_mask
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     model_path = ""
68 |     model = TextEncoderHFLlamaModel(model_path, torch.device("cuda"))
69 |     text = "A cat walks on the grass, realistic style."
70 |     outputs = model.infer(text)
71 |     logger.info(outputs)
72 | 


--------------------------------------------------------------------------------
/lightx2v/models/input_encoders/hf/llava/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/input_encoders/hf/llava/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/input_encoders/hf/t5/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/input_encoders/hf/t5/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/input_encoders/hf/t5/tokenizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
 2 | import html
 3 | import string
 4 | 
 5 | import ftfy
 6 | import regex as re
 7 | from transformers import AutoTokenizer
 8 | 
 9 | __all__ = ["HuggingfaceTokenizer"]
10 | 
11 | 
12 | def basic_clean(text):
13 |     text = ftfy.fix_text(text)
14 |     text = html.unescape(html.unescape(text))
15 |     return text.strip()
16 | 
17 | 
18 | def whitespace_clean(text):
19 |     text = re.sub(r"\s+", " ", text)
20 |     text = text.strip()
21 |     return text
22 | 
23 | 
24 | def canonicalize(text, keep_punctuation_exact_string=None):
25 |     text = text.replace("_", " ")
26 |     if keep_punctuation_exact_string:
27 |         text = keep_punctuation_exact_string.join(part.translate(str.maketrans("", "", string.punctuation)) for part in text.split(keep_punctuation_exact_string))
28 |     else:
29 |         text = text.translate(str.maketrans("", "", string.punctuation))
30 |     text = text.lower()
31 |     text = re.sub(r"\s+", " ", text)
32 |     return text.strip()
33 | 
34 | 
35 | class HuggingfaceTokenizer:
36 |     def __init__(self, name, seq_len=None, clean=None, **kwargs):
37 |         assert clean in (None, "whitespace", "lower", "canonicalize")
38 |         self.name = name
39 |         self.seq_len = seq_len
40 |         self.clean = clean
41 | 
42 |         # init tokenizer
43 |         self.tokenizer = AutoTokenizer.from_pretrained(name, **kwargs)
44 |         self.vocab_size = self.tokenizer.vocab_size
45 | 
46 |     def __call__(self, sequence, **kwargs):
47 |         return_mask = kwargs.pop("return_mask", False)
48 | 
49 |         # arguments
50 |         _kwargs = {"return_tensors": "pt"}
51 |         if self.seq_len is not None:
52 |             _kwargs.update(
53 |                 {
54 |                     "padding": "max_length",
55 |                     "truncation": True,
56 |                     "max_length": self.seq_len,
57 |                 }
58 |             )
59 |         _kwargs.update(**kwargs)
60 | 
61 |         # tokenization
62 |         if isinstance(sequence, str):
63 |             sequence = [sequence]
64 |         if self.clean:
65 |             sequence = [self._clean(u) for u in sequence]
66 |         ids = self.tokenizer(sequence, **_kwargs)
67 | 
68 |         # output
69 |         if return_mask:
70 |             return ids.input_ids, ids.attention_mask
71 |         else:
72 |             return ids.input_ids
73 | 
74 |     def _clean(self, text):
75 |         if self.clean == "whitespace":
76 |             text = whitespace_clean(basic_clean(text))
77 |         elif self.clean == "lower":
78 |             text = whitespace_clean(basic_clean(text)).lower()
79 |         elif self.clean == "canonicalize":
80 |             text = canonicalize(basic_clean(text))
81 |         return text
82 | 


--------------------------------------------------------------------------------
/lightx2v/models/input_encoders/hf/t5_v1_1_xxl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/input_encoders/hf/t5_v1_1_xxl/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/input_encoders/hf/t5_v1_1_xxl/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import os
 3 | from transformers import T5EncoderModel, T5Tokenizer
 4 | 
 5 | 
 6 | class T5EncoderModel_v1_1_xxl:
 7 |     def __init__(self, config):
 8 |         self.config = config
 9 |         self.model = T5EncoderModel.from_pretrained(os.path.join(config.model_path, "text_encoder")).to(torch.bfloat16).to(torch.device("cuda"))
10 |         self.tokenizer = T5Tokenizer.from_pretrained(os.path.join(config.model_path, "tokenizer"), padding_side="right")
11 | 
12 |     def to_cpu(self):
13 |         self.model = self.model.to("cpu")
14 | 
15 |     def to_cuda(self):
16 |         self.model = self.model.to("cuda")
17 | 
18 |     def infer(self, texts, config):
19 |         text_inputs = self.tokenizer(
20 |             texts,
21 |             padding="max_length",
22 |             max_length=config.text_len,
23 |             truncation=True,
24 |             add_special_tokens=True,
25 |             return_tensors="pt",
26 |         ).to("cuda")
27 | 
28 |         text_input_ids = text_inputs.input_ids
29 |         untruncated_ids = self.tokenizer(texts, padding="longest", return_tensors="pt").input_ids
30 | 
31 |         if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(text_input_ids, untruncated_ids):
32 |             removed_text = self.tokenizer.batch_decode(untruncated_ids[:, config.text_len - 1 : -1])
33 |             print(f"The following part of your input was truncated because `max_sequence_length` is set to  {self.text_len} tokens: {removed_text}")
34 | 
35 |         prompt_embeds = self.model(text_input_ids.to(torch.device("cuda")))[0]
36 |         return prompt_embeds
37 | 


--------------------------------------------------------------------------------
/lightx2v/models/input_encoders/hf/xlm_roberta/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/input_encoders/hf/xlm_roberta/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/networks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/networks/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/networks/cogvideox/infer/post_infer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class CogvideoxPostInfer:
 5 |     def __init__(self, config):
 6 |         self.config = config
 7 | 
 8 |     def ada_layernorm(self, weight_mm, weight_ln, x, temb):
 9 |         temb = torch.nn.functional.silu(temb)
10 |         temb = weight_mm.apply(temb)
11 |         shift, scale = temb.chunk(2, dim=1)
12 |         x = weight_ln.apply(x) * (1 + scale) + shift
13 |         return x
14 | 
15 |     def infer(self, weight, hidden_states, encoder_hidden_states, temb, infer_shapes):
16 |         hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=0)
17 |         hidden_states = weight.norm_final.apply(hidden_states)
18 |         hidden_states = hidden_states[self.config.text_len :,]
19 |         hidden_states = self.ada_layernorm(weight.norm_out_linear, weight.norm_out_norm, hidden_states, temb=temb)
20 |         hidden_states = weight.proj_out.apply(hidden_states)
21 |         p = self.config["patch_size"]
22 |         p_t = self.config["patch_size_t"]
23 |         num_frames, _, height, width = infer_shapes
24 |         output = hidden_states.reshape((num_frames + p_t - 1) // p_t, height // p, width // p, -1, p_t, p, p)
25 |         output = output.permute(0, 4, 3, 1, 5, 2, 6).flatten(5, 6).flatten(3, 4).flatten(0, 1)
26 |         return output
27 | 


--------------------------------------------------------------------------------
/lightx2v/models/networks/cogvideox/weights/post_weights.py:
--------------------------------------------------------------------------------
 1 | from lightx2v.utils.registry_factory import MM_WEIGHT_REGISTER, LN_WEIGHT_REGISTER
 2 | from lightx2v.common.ops.mm.mm_weight import MMWeightTemplate
 3 | from lightx2v.common.ops.norm.layer_norm_weight import LNWeightTemplate
 4 | 
 5 | 
 6 | class CogvideoxPostWeights:
 7 |     def __init__(self, config, mm_type="Default"):
 8 |         self.config = config
 9 |         self.mm_type = mm_type
10 | 
11 |     def load_weights(self, weight_dict):
12 |         self.norm_out_linear = MM_WEIGHT_REGISTER[self.mm_type]("norm_out.linear.weight", "norm_out.linear.bias")
13 |         self.proj_out = MM_WEIGHT_REGISTER[self.mm_type]("proj_out.weight", "proj_out.bias")
14 |         self.norm_final = LN_WEIGHT_REGISTER[self.mm_type]("norm_final.weight", "norm_final.bias")
15 |         self.norm_out_norm = LN_WEIGHT_REGISTER[self.mm_type]("norm_out.norm.weight", "norm_out.norm.bias", eps=1e-5)
16 | 
17 |         self.weight_list = [self.norm_out_linear, self.proj_out, self.norm_final, self.norm_out_norm]
18 | 
19 |         for mm_weight in self.weight_list:
20 |             if isinstance(mm_weight, (MMWeightTemplate, LNWeightTemplate)):
21 |                 mm_weight.load(weight_dict)
22 | 
23 |     def to_cpu(self):
24 |         for mm_weight in self.weight_list:
25 |             if isinstance(mm_weight, (MMWeightTemplate, LNWeightTemplate)):
26 |                 mm_weight.to_cpu()
27 | 
28 |     def to_cuda(self):
29 |         for mm_weight in self.weight_list:
30 |             if isinstance(mm_weight, (MMWeightTemplate, LNWeightTemplate)):
31 |                 mm_weight.to_cuda()
32 | 


--------------------------------------------------------------------------------
/lightx2v/models/networks/cogvideox/weights/pre_weights.py:
--------------------------------------------------------------------------------
 1 | from lightx2v.utils.registry_factory import MM_WEIGHT_REGISTER
 2 | from lightx2v.common.ops.mm.mm_weight import MMWeightTemplate
 3 | from lightx2v.common.ops.norm.layer_norm_weight import LNWeightTemplate
 4 | 
 5 | 
 6 | class CogvideoxPreWeights:
 7 |     def __init__(self, config):
 8 |         self.config = config
 9 | 
10 |     def load_weights(self, weight_dict):
11 |         self.time_embedding_linear_1 = MM_WEIGHT_REGISTER["Default"]("time_embedding.linear_1.weight", "time_embedding.linear_1.bias")
12 |         self.time_embedding_linear_2 = MM_WEIGHT_REGISTER["Default"]("time_embedding.linear_2.weight", "time_embedding.linear_2.bias")
13 |         self.patch_embed_proj = MM_WEIGHT_REGISTER["Default"]("patch_embed.proj.weight", "patch_embed.proj.bias")
14 |         self.patch_embed_text_proj = MM_WEIGHT_REGISTER["Default"]("patch_embed.text_proj.weight", "patch_embed.text_proj.bias")
15 | 
16 |         self.weight_list = [self.time_embedding_linear_1, self.time_embedding_linear_2, self.patch_embed_proj, self.patch_embed_text_proj]
17 | 
18 |         for mm_weight in self.weight_list:
19 |             mm_weight.set_config(self.config)
20 |             mm_weight.load(weight_dict)
21 | 
22 |     def to_cpu(self):
23 |         for mm_weight in self.weight_list:
24 |             if isinstance(mm_weight, (MMWeightTemplate, LNWeightTemplate)):
25 |                 mm_weight.to_cpu()
26 | 
27 |     def to_cuda(self):
28 |         for mm_weight in self.weight_list:
29 |             if isinstance(mm_weight, (MMWeightTemplate, LNWeightTemplate)):
30 |                 mm_weight.to_cuda()
31 | 


--------------------------------------------------------------------------------
/lightx2v/models/networks/hunyuan/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/networks/hunyuan/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/networks/hunyuan/infer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/networks/hunyuan/infer/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/networks/hunyuan/infer/feature_caching/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/networks/hunyuan/infer/feature_caching/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/networks/hunyuan/infer/feature_caching/utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | import math
 3 | import torch
 4 | 
 5 | 
 6 | def taylor_cache_init(cache_dic: Dict, current: Dict):
 7 |     """
 8 |     Initialize Taylor cache, expanding storage areas for Taylor series derivatives
 9 |     :param cache_dic: Cache dictionary
10 |     :param current: Information of the current step
11 |     """
12 |     if current["step"] == 0:
13 |         cache_dic["cache"][-1][current["stream"]][current["layer"]][current["module"]] = {}
14 | 
15 | 
16 | def derivative_approximation(cache_dic: Dict, current: Dict, feature: torch.Tensor):
17 |     """
18 |     Compute derivative approximation
19 |     :param cache_dic: Cache dictionary
20 |     :param current: Information of the current step
21 |     """
22 |     difference_distance = current["activated_steps"][-1] - current["activated_steps"][-2]
23 |     # difference_distance = current['activated_times'][-1] - current['activated_times'][-2]
24 | 
25 |     updated_taylor_factors = {}
26 |     updated_taylor_factors[0] = feature
27 | 
28 |     for i in range(cache_dic["max_order"]):
29 |         if (cache_dic["cache"][-1][current["stream"]][current["layer"]][current["module"]].get(i, None) is not None) and (current["step"] > cache_dic["first_enhance"] - 2):
30 |             updated_taylor_factors[i + 1] = (updated_taylor_factors[i] - cache_dic["cache"][-1][current["stream"]][current["layer"]][current["module"]][i]) / difference_distance
31 |         else:
32 |             break
33 | 
34 |     cache_dic["cache"][-1][current["stream"]][current["layer"]][current["module"]] = updated_taylor_factors
35 | 
36 | 
37 | def taylor_formula(cache_dic: Dict, current: Dict) -> torch.Tensor:
38 |     """
39 |     Compute Taylor expansion error
40 |     :param cache_dic: Cache dictionary
41 |     :param current: Information of the current step
42 |     """
43 |     x = current["step"] - current["activated_steps"][-1]
44 |     # x = current['t'] - current['activated_times'][-1]
45 |     output = 0
46 | 
47 |     for i in range(len(cache_dic["cache"][-1][current["stream"]][current["layer"]][current["module"]])):
48 |         output += (1 / math.factorial(i)) * cache_dic["cache"][-1][current["stream"]][current["layer"]][current["module"]][i] * (x**i)
49 | 
50 |     return output
51 | 


--------------------------------------------------------------------------------
/lightx2v/models/networks/hunyuan/infer/post_infer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class HunyuanPostInfer:
 5 |     def __init__(self, config):
 6 |         self.config = config
 7 | 
 8 |     def set_scheduler(self, scheduler):
 9 |         self.scheduler = scheduler
10 | 
11 |     def infer(self, weights, img, vec):
12 |         out = torch.nn.functional.silu(vec)
13 |         out = weights.final_layer_adaLN_modulation_1.apply(out)
14 |         shift, scale = out.chunk(2, dim=1)
15 |         out = torch.nn.functional.layer_norm(img, (img.shape[1],), None, None, 1e-6)
16 |         out = out * (1 + scale) + shift
17 |         out = weights.final_layer_linear.apply(out.to(torch.float32))
18 |         _, _, ot, oh, ow = self.scheduler.latents.shape
19 |         patch_size = [1, 2, 2]
20 |         tt, th, tw = (
21 |             ot // patch_size[0],
22 |             oh // patch_size[1],
23 |             ow // patch_size[2],
24 |         )
25 | 
26 |         c = 16
27 |         pt, ph, pw = patch_size
28 | 
29 |         out = out.reshape(shape=(1, tt, th, tw, c, pt, ph, pw))
30 |         out = torch.einsum("nthwcopq->nctohpwq", out)
31 |         out = out.reshape(shape=(1, c, tt * pt, th * ph, tw * pw))
32 | 
33 |         return out
34 | 


--------------------------------------------------------------------------------
/lightx2v/models/networks/hunyuan/infer/utils.py:
--------------------------------------------------------------------------------
 1 | import sgl_kernel
 2 | 
 3 | 
 4 | def rms_norm(x, weight, eps):
 5 |     x = x.contiguous()
 6 |     orig_shape = x.shape
 7 |     x = x.view(-1, orig_shape[-1])
 8 |     x = sgl_kernel.rmsnorm(x, weight, eps).view(orig_shape)
 9 |     return x
10 | 


--------------------------------------------------------------------------------
/lightx2v/models/networks/hunyuan/infer/utils_bf16.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from typing import Any, List, Tuple, Optional, Union, Dict
 3 | 
 4 | 
 5 | def rms_norm(x, weight, eps):
 6 |     x = x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + eps)
 7 |     x = x * weight
 8 |     return x
 9 | 
10 | 
11 | def rotate_half(x, shape_0, shape_1):
12 |     x_real, x_imag = x.reshape(shape_0, shape_1, -1, 2).unbind(-1)
13 |     return torch.stack([-x_imag, x_real], dim=-1).flatten(2)
14 | 
15 | 
16 | def rotary_emb(x, shape_0, shape_1, cos, sin):
17 |     x_out = x * cos + rotate_half(x, shape_0, shape_1) * sin
18 |     return x_out
19 | 
20 | 
21 | def apply_rotary_emb(
22 |     xq: torch.Tensor,
23 |     xk: torch.Tensor,
24 |     freqs_cis: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]],
25 | ) -> Tuple[torch.Tensor, torch.Tensor]:
26 |     shape_0, shape_1, shape_2 = xq.shape
27 |     cos = freqs_cis[0].view(shape_0, 1, shape_2)
28 |     sin = freqs_cis[1].view(shape_0, 1, shape_2)
29 |     xq_out = rotary_emb(xq, shape_0, shape_1, cos, sin)
30 |     xk_out = rotary_emb(xk, shape_0, shape_1, cos, sin)
31 |     return xq_out, xk_out
32 | 


--------------------------------------------------------------------------------
/lightx2v/models/networks/hunyuan/infer/utils_fp32.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from typing import Any, List, Tuple, Optional, Union, Dict
 3 | 
 4 | 
 5 | def rms_norm(x, weight, eps):
 6 |     x = x.float()
 7 |     x = x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + eps)
 8 |     x = x.to(torch.bfloat16)
 9 |     x = x * weight
10 |     return x
11 | 
12 | 
13 | def rotate_half(x, shape_0, shape_1):
14 |     x_real, x_imag = x.float().reshape(shape_0, shape_1, -1, 2).unbind(-1)
15 |     return torch.stack([-x_imag, x_real], dim=-1).flatten(2)
16 | 
17 | 
18 | def rotary_emb(x, shape_0, shape_1, cos, sin):
19 |     x_out = x * cos + rotate_half(x, shape_0, shape_1) * sin
20 |     return x_out.to(torch.bfloat16)
21 | 
22 | 
23 | def apply_rotary_emb(
24 |     xq: torch.Tensor,
25 |     xk: torch.Tensor,
26 |     freqs_cis: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]],
27 | ) -> Tuple[torch.Tensor, torch.Tensor]:
28 |     shape_0, shape_1, shape_2 = xq.shape
29 |     cos = freqs_cis[0].view(shape_0, 1, shape_2)
30 |     sin = freqs_cis[1].view(shape_0, 1, shape_2)
31 |     xq_out = rotary_emb(xq.float(), shape_0, shape_1, cos, sin)
32 |     xk_out = rotary_emb(xk.float(), shape_0, shape_1, cos, sin)
33 |     return xq_out, xk_out
34 | 


--------------------------------------------------------------------------------
/lightx2v/models/networks/hunyuan/weights/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/networks/hunyuan/weights/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/networks/hunyuan/weights/post_weights.py:
--------------------------------------------------------------------------------
 1 | from lightx2v.utils.registry_factory import MM_WEIGHT_REGISTER
 2 | from lightx2v.common.modules.weight_module import WeightModule
 3 | 
 4 | 
 5 | class HunyuanPostWeights(WeightModule):
 6 |     def __init__(self, config):
 7 |         super().__init__()
 8 |         self.config = config
 9 | 
10 |         self.add_module("final_layer_linear", MM_WEIGHT_REGISTER["Default-Force-FP32"]("final_layer.linear.weight", "final_layer.linear.bias"))
11 |         self.add_module("final_layer_adaLN_modulation_1", MM_WEIGHT_REGISTER["Default"]("final_layer.adaLN_modulation.1.weight", "final_layer.adaLN_modulation.1.bias"))
12 | 


--------------------------------------------------------------------------------
/lightx2v/models/networks/wan/causvid_model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import time
 4 | import glob
 5 | from lightx2v.models.networks.wan.model import WanModel
 6 | from lightx2v.models.networks.wan.weights.pre_weights import WanPreWeights
 7 | from lightx2v.models.networks.wan.weights.post_weights import WanPostWeights
 8 | from lightx2v.models.networks.wan.weights.transformer_weights import (
 9 |     WanTransformerWeights,
10 | )
11 | from lightx2v.models.networks.wan.infer.pre_infer import WanPreInfer
12 | from lightx2v.models.networks.wan.infer.post_infer import WanPostInfer
13 | from lightx2v.models.networks.wan.infer.causvid.transformer_infer import (
14 |     WanTransformerInferCausVid,
15 | )
16 | from lightx2v.models.networks.wan.infer.feature_caching.transformer_infer import WanTransformerInferTeaCaching
17 | from safetensors import safe_open
18 | import lightx2v.attentions.distributed.ulysses.wrap as ulysses_dist_wrap
19 | import lightx2v.attentions.distributed.ring.wrap as ring_dist_wrap
20 | 
21 | 
22 | class WanCausVidModel(WanModel):
23 |     pre_weight_class = WanPreWeights
24 |     post_weight_class = WanPostWeights
25 |     transformer_weight_class = WanTransformerWeights
26 | 
27 |     def __init__(self, model_path, config, device):
28 |         super().__init__(model_path, config, device)
29 | 
30 |     def _init_infer_class(self):
31 |         self.pre_infer_class = WanPreInfer
32 |         self.post_infer_class = WanPostInfer
33 |         self.transformer_infer_class = WanTransformerInferCausVid
34 | 
35 |     def _load_ckpt(self):
36 |         use_bfloat16 = self.config.get("use_bfloat16", True)
37 |         ckpt_path = os.path.join(self.model_path, "causal_model.pt")
38 |         if not os.path.exists(ckpt_path):
39 |             # 文件不存在，调用父类的 _load_ckpt 方法
40 |             return super()._load_ckpt()
41 | 
42 |         weight_dict = torch.load(ckpt_path, map_location="cpu", weights_only=True)
43 | 
44 |         dtype = torch.bfloat16 if use_bfloat16 else None
45 |         for key, value in weight_dict.items():
46 |             weight_dict[key] = value.to(device=self.device, dtype=dtype)
47 | 
48 |         return weight_dict
49 | 
50 |     @torch.no_grad()
51 |     def infer(self, inputs, kv_start, kv_end):
52 |         if self.config["cpu_offload"]:
53 |             self.pre_weight.to_cuda()
54 |             self.post_weight.to_cuda()
55 | 
56 |         embed, grid_sizes, pre_infer_out = self.pre_infer.infer(self.pre_weight, inputs, positive=True, kv_start=kv_start, kv_end=kv_end)
57 | 
58 |         x = self.transformer_infer.infer(self.transformer_weights, grid_sizes, embed, *pre_infer_out, kv_start, kv_end)
59 |         self.scheduler.noise_pred = self.post_infer.infer(self.post_weight, x, embed, grid_sizes)[0]
60 | 
61 |         if self.config["cpu_offload"]:
62 |             self.pre_weight.to_cpu()
63 |             self.post_weight.to_cpu()
64 | 


--------------------------------------------------------------------------------
/lightx2v/models/networks/wan/infer/causvid/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/networks/wan/infer/causvid/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/networks/wan/infer/feature_caching/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/networks/wan/infer/feature_caching/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/networks/wan/infer/post_infer.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.cuda.amp as amp
 4 | 
 5 | 
 6 | class WanPostInfer:
 7 |     def __init__(self, config):
 8 |         self.out_dim = config["out_dim"]
 9 |         self.patch_size = (1, 2, 2)
10 | 
11 |     def set_scheduler(self, scheduler):
12 |         self.scheduler = scheduler
13 | 
14 |     def infer(self, weights, x, e, grid_sizes):
15 |         if e.dim() == 2:
16 |             modulation = weights.head_modulation.tensor  # 1, 2, dim
17 |             e = (modulation + e.unsqueeze(1)).chunk(2, dim=1)
18 |         elif e.dim() == 3:  # For Diffustion forcing
19 |             modulation = weights.head_modulation.tensor.unsqueeze(2)  # 1, 2, seq, dim
20 |             e = (modulation + e.unsqueeze(1)).chunk(2, dim=1)
21 |             e = [ei.squeeze(1) for ei in e]
22 | 
23 |         norm_out = torch.nn.functional.layer_norm(x, (x.shape[1],), None, None, 1e-6).type_as(x)
24 |         out = norm_out * (1 + e[1].squeeze(0)) + e[0].squeeze(0)
25 |         x = weights.head.apply(out)
26 |         x = self.unpatchify(x, grid_sizes)
27 |         return [u.float() for u in x]
28 | 
29 |     def unpatchify(self, x, grid_sizes):
30 |         x = x.unsqueeze(0)
31 |         c = self.out_dim
32 |         out = []
33 |         for u, v in zip(x, grid_sizes.tolist()):
34 |             u = u[: math.prod(v)].view(*v, *self.patch_size, c)
35 |             u = torch.einsum("fhwpqrc->cfphqwr", u)
36 |             u = u.reshape(c, *[i * j for i, j in zip(v, self.patch_size)])
37 |             out.append(u)
38 |         return out
39 | 


--------------------------------------------------------------------------------
/lightx2v/models/networks/wan/weights/post_weights.py:
--------------------------------------------------------------------------------
 1 | from lightx2v.utils.registry_factory import MM_WEIGHT_REGISTER, TENSOR_REGISTER
 2 | from lightx2v.common.modules.weight_module import WeightModule
 3 | 
 4 | 
 5 | class WanPostWeights(WeightModule):
 6 |     def __init__(self, config):
 7 |         super().__init__()
 8 |         self.config = config
 9 |         self.add_module("head", MM_WEIGHT_REGISTER["Default"]("head.head.weight", "head.head.bias"))
10 |         self.register_parameter("head_modulation", TENSOR_REGISTER["Default"]("head.modulation"))
11 | 


--------------------------------------------------------------------------------
/lightx2v/models/networks/wan/weights/pre_weights.py:
--------------------------------------------------------------------------------
 1 | from lightx2v.utils.registry_factory import MM_WEIGHT_REGISTER, LN_WEIGHT_REGISTER, CONV3D_WEIGHT_REGISTER
 2 | from lightx2v.common.modules.weight_module import WeightModule
 3 | 
 4 | 
 5 | class WanPreWeights(WeightModule):
 6 |     def __init__(self, config):
 7 |         super().__init__()
 8 |         self.in_dim = config["in_dim"]
 9 |         self.dim = config["dim"]
10 |         self.patch_size = (1, 2, 2)
11 |         self.config = config
12 | 
13 |         self.add_module("patch_embedding", CONV3D_WEIGHT_REGISTER["Defaultt-Force-BF16"]("patch_embedding.weight", "patch_embedding.bias", stride=self.patch_size))
14 |         self.add_module("text_embedding_0", MM_WEIGHT_REGISTER["Default"]("text_embedding.0.weight", "text_embedding.0.bias"))
15 |         self.add_module("text_embedding_2", MM_WEIGHT_REGISTER["Default"]("text_embedding.2.weight", "text_embedding.2.bias"))
16 |         self.add_module("time_embedding_0", MM_WEIGHT_REGISTER["Default"]("time_embedding.0.weight", "time_embedding.0.bias"))
17 |         self.add_module("time_embedding_2", MM_WEIGHT_REGISTER["Default"]("time_embedding.2.weight", "time_embedding.2.bias"))
18 |         self.add_module("time_projection_1", MM_WEIGHT_REGISTER["Default"]("time_projection.1.weight", "time_projection.1.bias"))
19 | 
20 |         if config.task == "i2v":
21 |             self.add_module("proj_0", LN_WEIGHT_REGISTER["Default"]("img_emb.proj.0.weight", "img_emb.proj.0.bias", eps=1e-5))
22 |             self.add_module("proj_1", MM_WEIGHT_REGISTER["Default"]("img_emb.proj.1.weight", "img_emb.proj.1.bias"))
23 |             self.add_module("proj_3", MM_WEIGHT_REGISTER["Default"]("img_emb.proj.3.weight", "img_emb.proj.3.bias"))
24 |             self.add_module("proj_4", LN_WEIGHT_REGISTER["Default"]("img_emb.proj.4.weight", "img_emb.proj.4.bias", eps=1e-5))
25 | 


--------------------------------------------------------------------------------
/lightx2v/models/runners/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/runners/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/runners/cogvideox/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/runners/cogvideox/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/runners/graph_runner.py:
--------------------------------------------------------------------------------
 1 | from lightx2v.utils.profiler import ProfilingContext4Debug
 2 | from loguru import logger
 3 | 
 4 | 
 5 | class GraphRunner:
 6 |     def __init__(self, runner):
 7 |         self.runner = runner
 8 |         self.compile()
 9 | 
10 |     def compile(self):
11 |         logger.info("start compile...")
12 |         with ProfilingContext4Debug("compile"):
13 |             self.runner.run_step()
14 |         logger.info("end compile...")
15 | 
16 |     def run_pipeline(self):
17 |         return self.runner.run_pipeline()
18 | 


--------------------------------------------------------------------------------
/lightx2v/models/runners/hunyuan/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/runners/hunyuan/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/runners/wan/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/runners/wan/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/schedulers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/schedulers/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/schedulers/hunyuan/feature_caching/scheduler.py:
--------------------------------------------------------------------------------
 1 | from .utils import cache_init, cal_type
 2 | from ..scheduler import HunyuanScheduler
 3 | import torch
 4 | 
 5 | 
 6 | class HunyuanSchedulerTeaCaching(HunyuanScheduler):
 7 |     def __init__(self, config):
 8 |         super().__init__(config)
 9 |         self.cnt = 0
10 |         self.num_steps = self.config.infer_steps
11 |         self.teacache_thresh = self.config.teacache_thresh
12 |         self.accumulated_rel_l1_distance = 0
13 |         self.previous_modulated_input = None
14 |         self.previous_residual = None
15 |         self.coefficients = [7.33226126e02, -4.01131952e02, 6.75869174e01, -3.14987800e00, 9.61237896e-02]
16 | 
17 |     def clear(self):
18 |         if self.previous_residual is not None:
19 |             self.previous_residual = self.previous_residual.cpu()
20 |         if self.previous_modulated_input is not None:
21 |             self.previous_modulated_input = self.previous_modulated_input.cpu()
22 | 
23 |         self.previous_modulated_input = None
24 |         self.previous_residual = None
25 |         torch.cuda.empty_cache()
26 | 
27 | 
28 | class HunyuanSchedulerTaylorCaching(HunyuanScheduler):
29 |     def __init__(self, config):
30 |         super().__init__(config)
31 |         self.cache_dic, self.current = cache_init(self.infer_steps)
32 | 
33 |     def step_pre(self, step_index):
34 |         super().step_pre(step_index)
35 |         self.current["step"] = step_index
36 |         cal_type(self.cache_dic, self.current)
37 | 


--------------------------------------------------------------------------------
/lightx2v/models/schedulers/scheduler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class BaseScheduler:
 5 |     def __init__(self, config):
 6 |         self.config = config
 7 |         self.step_index = 0
 8 |         self.latents = None
 9 |         self.flag_df = False
10 | 
11 |     def step_pre(self, step_index):
12 |         self.step_index = step_index
13 |         self.latents = self.latents.to(dtype=torch.bfloat16)
14 | 
15 |     def clear(self):
16 |         pass
17 | 


--------------------------------------------------------------------------------
/lightx2v/models/schedulers/wan/causvid/scheduler.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import numpy as np
 3 | import torch
 4 | from typing import List, Optional, Tuple, Union
 5 | from lightx2v.models.schedulers.wan.scheduler import WanScheduler
 6 | 
 7 | 
 8 | class WanCausVidScheduler(WanScheduler):
 9 |     def __init__(self, config):
10 |         super().__init__(config)
11 |         self.denoising_step_list = config.denoising_step_list
12 |         self.infer_steps = self.config.infer_steps
13 |         self.sample_shift = self.config.sample_shift
14 | 
15 |     def prepare(self, image_encoder_output):
16 |         self.generator = torch.Generator(device=self.device)
17 |         self.generator.manual_seed(self.config.seed)
18 | 
19 |         self.prepare_latents(self.config.target_shape, dtype=torch.float32)
20 | 
21 |         if self.config.task in ["t2v"]:
22 |             self.seq_len = math.ceil((self.config.target_shape[2] * self.config.target_shape[3]) / (self.config.patch_size[1] * self.config.patch_size[2]) * self.config.target_shape[1])
23 |         elif self.config.task in ["i2v"]:
24 |             self.seq_len = self.config.lat_h * self.config.lat_w // (self.config.patch_size[1] * self.config.patch_size[2]) * self.config.target_shape[1]
25 | 
26 |         alphas = np.linspace(1, 1 / self.num_train_timesteps, self.num_train_timesteps)[::-1].copy()
27 |         sigmas = 1.0 - alphas
28 |         sigmas = torch.from_numpy(sigmas).to(dtype=torch.float32)
29 | 
30 |         sigmas = self.shift * sigmas / (1 + (self.shift - 1) * sigmas)
31 | 
32 |         self.sigmas = sigmas
33 |         self.timesteps = sigmas * self.num_train_timesteps
34 | 
35 |         self.model_outputs = [None] * self.solver_order
36 |         self.timestep_list = [None] * self.solver_order
37 |         self.last_sample = None
38 | 
39 |         self.sigmas = self.sigmas.to("cpu")
40 |         self.sigma_min = self.sigmas[-1].item()
41 |         self.sigma_max = self.sigmas[0].item()
42 | 
43 |         if len(self.denoising_step_list) == self.infer_steps:  # 如果denoising_step_list有效既使用
44 |             self.set_denoising_timesteps(device=self.device)
45 |         else:
46 |             self.set_timesteps(self.infer_steps, device=self.device, shift=self.sample_shift)
47 | 
48 |     def set_denoising_timesteps(self, device: Union[str, torch.device] = None):
49 |         self.timesteps = torch.tensor(self.denoising_step_list, device=device, dtype=torch.int64)
50 |         self.sigmas = torch.cat([self.timesteps / self.num_train_timesteps, torch.tensor([0.0], device=device)])
51 |         self.sigmas = self.sigmas.to("cpu")
52 |         self.infer_steps = len(self.timesteps)
53 | 
54 |         self.model_outputs = [
55 |             None,
56 |         ] * self.solver_order
57 |         self.lower_order_nums = 0
58 |         self.last_sample = None
59 |         self._begin_index = None
60 | 
61 |     def reset(self):
62 |         self.model_outputs = [None] * self.solver_order
63 |         self.timestep_list = [None] * self.solver_order
64 |         self.last_sample = None
65 |         self.noise_pred = None
66 |         self.this_order = None
67 |         self.lower_order_nums = 0
68 |         self.prepare_latents(self.config.target_shape, dtype=torch.float32)
69 | 


--------------------------------------------------------------------------------
/lightx2v/models/video_encoders/hf/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/video_encoders/hf/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/video_encoders/hf/autoencoder_kl_causal_3d/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/video_encoders/hf/autoencoder_kl_causal_3d/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/video_encoders/hf/autoencoder_kl_causal_3d/model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from .autoencoder_kl_causal_3d import AutoencoderKLCausal3D, DiagonalGaussianDistribution
 4 | 
 5 | 
 6 | class VideoEncoderKLCausal3DModel:
 7 |     def __init__(self, model_path, dtype, device, config):
 8 |         self.model_path = model_path
 9 |         self.dtype = dtype
10 |         self.device = device
11 |         self.config = config
12 |         self.load()
13 | 
14 |     def load(self):
15 |         if self.config.task == "t2v":
16 |             self.vae_path = os.path.join(self.model_path, "hunyuan-video-t2v-720p/vae")
17 |         else:
18 |             self.vae_path = os.path.join(self.model_path, "hunyuan-video-i2v-720p/vae")
19 |         config = AutoencoderKLCausal3D.load_config(self.vae_path)
20 |         self.model = AutoencoderKLCausal3D.from_config(config)
21 |         ckpt = torch.load(os.path.join(self.vae_path, "pytorch_model.pt"), map_location="cpu", weights_only=True)
22 |         self.model.load_state_dict(ckpt)
23 |         self.model = self.model.to(dtype=self.dtype, device=self.device)
24 |         self.model.requires_grad_(False)
25 |         self.model.eval()
26 | 
27 |     def to_cpu(self):
28 |         self.model = self.model.to("cpu")
29 | 
30 |     def to_cuda(self):
31 |         self.model = self.model.to("cuda")
32 | 
33 |     def decode(self, latents, generator, config):
34 |         if config.cpu_offload:
35 |             self.to_cuda()
36 |         latents = latents / self.model.config.scaling_factor
37 |         latents = latents.to(dtype=self.dtype, device=torch.device("cuda"))
38 |         self.model.enable_tiling()
39 |         image = self.model.decode(latents, return_dict=False, generator=generator)[0]
40 |         image = (image / 2 + 0.5).clamp(0, 1)
41 |         image = image.cpu().float()
42 |         if config.cpu_offload:
43 |             self.to_cpu()
44 |         return image
45 | 
46 |     def encode(self, x, config):
47 |         h = self.model.encoder(x)
48 |         moments = self.model.quant_conv(h)
49 |         posterior = DiagonalGaussianDistribution(moments)
50 |         return posterior
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     model_path = ""
55 |     vae_model = VideoEncoderKLCausal3DModel(model_path, dtype=torch.float16, device=torch.device("cuda"))
56 | 


--------------------------------------------------------------------------------
/lightx2v/models/video_encoders/hf/cogvideox/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/video_encoders/hf/cogvideox/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/video_encoders/hf/cogvideox/model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import torch  # type: ignore
 4 | from safetensors import safe_open  # type: ignore
 5 | from diffusers.video_processor import VideoProcessor  # type: ignore
 6 | 
 7 | from lightx2v.models.video_encoders.hf.cogvideox.autoencoder_ks_cogvidex import AutoencoderKLCogVideoX
 8 | 
 9 | 
10 | class CogvideoxVAE:
11 |     def __init__(self, config):
12 |         self.config = config
13 |         self.load()
14 | 
15 |     def _load_safetensor_to_dict(self, file_path):
16 |         with safe_open(file_path, framework="pt") as f:
17 |             tensor_dict = {key: f.get_tensor(key).to(torch.bfloat16).cuda() for key in f.keys()}
18 |         return tensor_dict
19 | 
20 |     def _load_ckpt(self, model_path):
21 |         safetensors_pattern = os.path.join(model_path, "*.safetensors")
22 |         safetensors_files = glob.glob(safetensors_pattern)
23 | 
24 |         if not safetensors_files:
25 |             raise FileNotFoundError(f"No .safetensors files found in directory: {model_path}")
26 |         weight_dict = {}
27 |         for file_path in safetensors_files:
28 |             file_weights = self._load_safetensor_to_dict(file_path)
29 |             weight_dict.update(file_weights)
30 |         return weight_dict
31 | 
32 |     def load(self):
33 |         vae_path = os.path.join(self.config.model_path, "vae")
34 |         self.vae_config = AutoencoderKLCogVideoX.load_config(vae_path)
35 |         self.model = AutoencoderKLCogVideoX.from_config(self.vae_config)
36 |         vae_ckpt = self._load_ckpt(vae_path)
37 |         self.vae_scale_factor_spatial = 2 ** (len(self.vae_config["block_out_channels"]) - 1)  # 8
38 |         self.vae_scale_factor_temporal = self.vae_config["temporal_compression_ratio"]  # 4
39 |         self.vae_scaling_factor_image = self.vae_config["scaling_factor"]  # 0.7
40 |         self.model.load_state_dict(vae_ckpt)
41 |         self.model.to(torch.bfloat16).to(torch.device("cuda"))
42 |         self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor_spatial)
43 | 
44 |     @torch.no_grad()
45 |     def decode(self, latents, generator, config):
46 |         latents = latents.permute(0, 2, 1, 3, 4)
47 |         latents = 1 / self.config.vae_scaling_factor_image * latents
48 |         frames = self.model.decode(latents).sample
49 |         images = self.video_processor.postprocess_video(video=frames, output_type="pil")[0]
50 |         return images
51 | 


--------------------------------------------------------------------------------
/lightx2v/models/video_encoders/hf/wan/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/video_encoders/hf/wan/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/video_encoders/hf/wan/vae_tiny.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from ..tae import TAEHV
 4 | from lightx2v.utils.memory_profiler import peak_memory_decorator
 5 | 
 6 | 
 7 | class DotDict(dict):
 8 |     __getattr__ = dict.__getitem__
 9 |     __setattr__ = dict.__setitem__
10 | 
11 | 
12 | class WanVAE_tiny(nn.Module):
13 |     def __init__(self, vae_pth="taew2_1.pth", dtype=torch.bfloat16, device="cuda"):
14 |         super().__init__()
15 |         self.dtype = dtype
16 |         self.device = torch.device("cuda")
17 |         self.taehv = TAEHV(vae_pth).to(self.dtype)
18 |         self.temperal_downsample = [True, True, False]
19 |         self.config = DotDict(scaling_factor=1.0, latents_mean=torch.zeros(16), z_dim=16, latents_std=torch.ones(16))
20 | 
21 |     @peak_memory_decorator
22 |     @torch.no_grad()
23 |     def decode(self, latents, generator=None, return_dict=None, config=None):
24 |         latents = latents.unsqueeze(0)
25 |         n, c, t, h, w = latents.shape
26 |         # low-memory, set parallel=True for faster + higher memory
27 |         return self.taehv.decode_video(latents.transpose(1, 2).to(self.dtype), parallel=False).transpose(1, 2).mul_(2).sub_(1)
28 | 


--------------------------------------------------------------------------------
/lightx2v/models/video_encoders/trt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/models/video_encoders/trt/__init__.py


--------------------------------------------------------------------------------
/lightx2v/models/video_encoders/trt/autoencoder_kl_causal_3d/model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | 
 4 | from lightx2v.models.video_encoders.hf.autoencoder_kl_causal_3d.autoencoder_kl_causal_3d import AutoencoderKLCausal3D
 5 | from lightx2v.models.video_encoders.trt.autoencoder_kl_causal_3d import trt_vae_infer
 6 | 
 7 | 
 8 | class VideoEncoderKLCausal3DModel:
 9 |     def __init__(self, model_path, dtype, device):
10 |         self.model_path = model_path
11 |         self.dtype = dtype
12 |         self.device = device
13 |         self.load()
14 | 
15 |     def load(self):
16 |         self.vae_path = os.path.join(self.model_path, "hunyuan-video-t2v-720p/vae")
17 |         config = AutoencoderKLCausal3D.load_config(self.vae_path)
18 |         self.model = AutoencoderKLCausal3D.from_config(config)
19 |         ckpt = torch.load(os.path.join(self.vae_path, "pytorch_model.pt"), map_location="cpu", weights_only=True)
20 |         self.model.load_state_dict(ckpt)
21 |         self.model = self.model.to(dtype=self.dtype, device=self.device)
22 |         self.model.requires_grad_(False)
23 |         self.model.eval()
24 |         trt_decoder = trt_vae_infer.HyVaeTrtModelInfer(engine_path=os.path.join(self.vae_path, "vae_decoder.engine"))
25 |         self.model.decoder = trt_decoder
26 | 
27 |     def decode(self, latents, generator):
28 |         latents = latents / self.model.config.scaling_factor
29 |         latents = latents.to(dtype=self.dtype, device=self.device)
30 |         self.model.enable_tiling()
31 |         image = self.model.decode(latents, return_dict=False, generator=generator)[0]
32 |         image = (image / 2 + 0.5).clamp(0, 1)
33 |         image = image.cpu().float()
34 |         return image
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     model_path = ""
39 |     vae_model = VideoEncoderKLCausal3DModel(model_path, dtype=torch.float16, device=torch.device("cuda"))
40 | 


--------------------------------------------------------------------------------
/lightx2v/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/lightx2v/utils/__init__.py


--------------------------------------------------------------------------------
/lightx2v/utils/envs.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from functools import lru_cache
 3 | 
 4 | 
 5 | @lru_cache(maxsize=None)
 6 | def CHECK_ENABLE_PROFILING_DEBUG():
 7 |     ENABLE_PROFILING_DEBUG = os.getenv("ENABLE_PROFILING_DEBUG", "false").lower() == "true"
 8 |     return ENABLE_PROFILING_DEBUG
 9 | 
10 | 
11 | @lru_cache(maxsize=None)
12 | def CHECK_ENABLE_GRAPH_MODE():
13 |     ENABLE_GRAPH_MODE = os.getenv("ENABLE_GRAPH_MODE", "false").lower() == "true"
14 |     return ENABLE_GRAPH_MODE
15 | 
16 | 
17 | @lru_cache(maxsize=None)
18 | def GET_RUNNING_FLAG():
19 |     RUNNING_FLAG = os.getenv("RUNNING_FLAG", "infer")
20 |     return RUNNING_FLAG
21 | 


--------------------------------------------------------------------------------
/lightx2v/utils/generate_task_id.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import string
 3 | import time
 4 | from datetime import datetime
 5 | 
 6 | 
 7 | def generate_task_id():
 8 |     """
 9 |     Generate a random task ID in the format XXXX-XXXX-XXXX-XXXX-XXXX.
10 |     Features:
11 |     1. Does not modify the global random state.
12 |     2. Each X is an uppercase letter or digit (0-9).
13 |     3. Combines time factors to ensure high randomness.
14 |     """
15 |     # Save the current random state (does not affect external randomness)
16 |     original_state = random.getstate()
17 | 
18 |     try:
19 |         # Define character set (uppercase letters + digits)
20 |         characters = string.ascii_uppercase + string.digits
21 | 
22 |         # Create an independent random instance
23 |         local_random = random.Random(time.perf_counter_ns())
24 | 
25 |         # Generate 5 groups of 4-character random strings
26 |         groups = []
27 |         for _ in range(5):
28 |             # Mix new time factor for each group
29 |             time_mix = int(datetime.now().timestamp())
30 |             local_random.seed(time_mix + local_random.getstate()[1][0] + time.perf_counter_ns())
31 | 
32 |             groups.append("".join(local_random.choices(characters, k=4)))
33 | 
34 |         return "-".join(groups)
35 | 
36 |     finally:
37 |         # Restore the original random state
38 |         random.setstate(original_state)
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     # Set global random seed
43 |     random.seed(42)
44 | 
45 |     # Test that external randomness is not affected
46 |     print("External random number 1:", random.random())  # Always the same
47 |     print("Task ID 1:", generate_task_id())  # Different each time
48 |     print("External random number 1:", random.random())  # Always the same
49 |     print("Task ID 1:", generate_task_id())  # Different each time
50 | 


--------------------------------------------------------------------------------
/lightx2v/utils/memory_profiler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from loguru import logger
 3 | 
 4 | 
 5 | def peak_memory_decorator(func):
 6 |     def wrapper(*args, **kwargs):
 7 |         # 检查是否在分布式环境中
 8 |         rank_info = ""
 9 |         if torch.distributed.is_available() and torch.distributed.is_initialized():
10 |             rank = torch.distributed.get_rank()
11 |             rank_info = f"Rank {rank} - "
12 | 
13 |         # 如果使用GPU，重置显存统计
14 |         if torch.cuda.is_available():
15 |             torch.cuda.reset_peak_memory_stats()
16 | 
17 |         # 执行目标函数
18 |         result = func(*args, **kwargs)
19 | 
20 |         # 获取峰值显存
21 |         if torch.cuda.is_available():
22 |             peak_memory = torch.cuda.max_memory_allocated() / (1024**3)  # 转换为GB
23 |             logger.info(f"{rank_info}Function '{func.__qualname__}' Peak Memory: {peak_memory:.2f} GB")
24 |         else:
25 |             logger.info(f"{rank_info}Function '{func.__qualname__}' executed without GPU.")
26 | 
27 |         return result
28 | 
29 |     return wrapper
30 | 


--------------------------------------------------------------------------------
/lightx2v/utils/profiler.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import torch
 3 | from contextlib import ContextDecorator
 4 | from lightx2v.utils.envs import *
 5 | from loguru import logger
 6 | 
 7 | 
 8 | class _ProfilingContext(ContextDecorator):
 9 |     def __init__(self, name):
10 |         self.name = name
11 |         self.rank_info = ""
12 |         if torch.distributed.is_available() and torch.distributed.is_initialized():
13 |             rank = torch.distributed.get_rank()
14 |             self.rank_info = f"Rank {rank} - "
15 | 
16 |     def __enter__(self):
17 |         torch.cuda.synchronize()
18 |         if torch.cuda.is_available():
19 |             torch.cuda.reset_peak_memory_stats()
20 |         self.start_time = time.perf_counter()
21 |         return self
22 | 
23 |     def __exit__(self, exc_type, exc_val, exc_tb):
24 |         torch.cuda.synchronize()
25 |         if torch.cuda.is_available():
26 |             peak_memory = torch.cuda.max_memory_allocated() / (1024**3)  # 转换为GB
27 |             logger.info(f"{self.rank_info}Function '{self.name}' Peak Memory: {peak_memory:.2f} GB")
28 |         else:
29 |             logger.info(f"{self.rank_info}Function '{self.name}' executed without GPU.")
30 |         elapsed = time.perf_counter() - self.start_time
31 |         logger.info(f"[Profile] {self.name} cost {elapsed:.6f} seconds")
32 |         return False
33 | 
34 | 
35 | class _NullContext(ContextDecorator):
36 |     # Context manager without decision branch logic overhead
37 |     def __init__(self, *args, **kwargs):
38 |         pass
39 | 
40 |     def __enter__(self):
41 |         return self
42 | 
43 |     def __exit__(self, *args):
44 |         return False
45 | 
46 | 
47 | ProfilingContext = _ProfilingContext
48 | ProfilingContext4Debug = _ProfilingContext if CHECK_ENABLE_PROFILING_DEBUG() else _NullContext
49 | 


--------------------------------------------------------------------------------
/lightx2v/utils/registry_factory.py:
--------------------------------------------------------------------------------
 1 | class Register(dict):
 2 |     def __init__(self, *args, **kwargs):
 3 |         super(Register, self).__init__(*args, **kwargs)
 4 |         self._dict = {}
 5 | 
 6 |     def __call__(self, target_or_name):
 7 |         if callable(target_or_name):
 8 |             return self.register(target_or_name)
 9 |         else:
10 |             return lambda x: self.register(x, key=target_or_name)
11 | 
12 |     def register(self, target, key=None):
13 |         if not callable(target):
14 |             raise Exception(f"Error: {target} must be callable!")
15 | 
16 |         if key is None:
17 |             key = target.__name__
18 | 
19 |         if key in self._dict:
20 |             raise Exception(f"{key} already exists.")
21 | 
22 |         self[key] = target
23 |         return target
24 | 
25 |     def __setitem__(self, key, value):
26 |         self._dict[key] = value
27 | 
28 |     def __getitem__(self, key):
29 |         return self._dict[key]
30 | 
31 |     def __contains__(self, key):
32 |         return key in self._dict
33 | 
34 |     def __str__(self):
35 |         return str(self._dict)
36 | 
37 |     def keys(self):
38 |         return self._dict.keys()
39 | 
40 |     def values(self):
41 |         return self._dict.values()
42 | 
43 |     def items(self):
44 |         return self._dict.items()
45 | 
46 | 
47 | MM_WEIGHT_REGISTER = Register()
48 | ATTN_WEIGHT_REGISTER = Register()
49 | RMS_WEIGHT_REGISTER = Register()
50 | LN_WEIGHT_REGISTER = Register()
51 | CONV3D_WEIGHT_REGISTER = Register()
52 | CONV2D_WEIGHT_REGISTER = Register()
53 | 
54 | TENSOR_REGISTER = Register()
55 | 
56 | RUNNER_REGISTER = Register()
57 | 


--------------------------------------------------------------------------------
/lightx2v/utils/set_config.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | from easydict import EasyDict
 4 | from loguru import logger
 5 | 
 6 | 
 7 | def get_default_config():
 8 |     default_config = {
 9 |         "do_mm_calib": False,
10 |         "cpu_offload": False,
11 |         "parallel_attn_type": None,  # [None, "ulysses", "ring"]
12 |         "parallel_vae": False,
13 |         "max_area": False,
14 |         "vae_stride": (4, 8, 8),
15 |         "patch_size": (1, 2, 2),
16 |         "feature_caching": "NoCaching",  # ["NoCaching", "TaylorSeer", "Tea"]
17 |         "teacache_thresh": 0.26,
18 |         "use_ret_steps": False,
19 |         "use_bfloat16": True,
20 |         "lora_path": None,
21 |         "strength_model": 1.0,
22 |         "mm_config": {},
23 |         "use_prompt_enhancer": False,
24 |     }
25 |     return default_config
26 | 
27 | 
28 | def set_config(args):
29 |     config = get_default_config()
30 |     config.update({k: v for k, v in vars(args).items()})
31 |     config = EasyDict(config)
32 | 
33 |     with open(config.config_json, "r") as f:
34 |         config_json = json.load(f)
35 |     config.update(config_json)
36 | 
37 |     if os.path.exists(os.path.join(config.model_path, "config.json")):
38 |         with open(os.path.join(config.model_path, "config.json"), "r") as f:
39 |             model_config = json.load(f)
40 |         config.update(model_config)
41 | 
42 |     if config.task == "i2v":
43 |         if config.target_video_length % config.vae_stride[0] != 1:
44 |             logger.warning(f"`num_frames - 1` has to be divisible by {config.vae_stride[0]}. Rounding to the nearest number.")
45 |             config.target_video_length = config.target_video_length // config.vae_stride[0] * config.vae_stride[0] + 1
46 | 
47 |     return config
48 | 


--------------------------------------------------------------------------------
/lightx2v/utils/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from einops import rearrange
 3 | from loguru import logger
 4 | 
 5 | import torch
 6 | import torchvision
 7 | import numpy as np
 8 | import imageio
 9 | import random
10 | 
11 | import os
12 | 
13 | 
14 | def seed_all(seed):
15 |     random.seed(seed)
16 |     os.environ["PYTHONHASHSEED"] = str(seed)
17 |     np.random.seed(seed)
18 |     torch.manual_seed(seed)
19 |     torch.cuda.manual_seed(seed)
20 |     torch.cuda.manual_seed_all(seed)
21 |     torch.backends.cudnn.benchmark = False
22 |     torch.backends.cudnn.deterministic = True
23 | 
24 | 
25 | def save_videos_grid(videos: torch.Tensor, path: str, rescale=False, n_rows=1, fps=24):
26 |     """save videos by video tensor
27 |        copy from https://github.com/guoyww/AnimateDiff/blob/e92bd5671ba62c0d774a32951453e328018b7c5b/animatediff/utils/util.py#L61
28 | 
29 |     Args:
30 |         videos (torch.Tensor): video tensor predicted by the model
31 |         path (str): path to save video
32 |         rescale (bool, optional): rescale the video tensor from [-1, 1] to  . Defaults to False.
33 |         n_rows (int, optional): Defaults to 1.
34 |         fps (int, optional): video save fps. Defaults to 8.
35 |     """
36 |     videos = rearrange(videos, "b c t h w -> t b c h w")
37 |     outputs = []
38 |     for x in videos:
39 |         x = torchvision.utils.make_grid(x, nrow=n_rows)
40 |         x = x.transpose(0, 1).transpose(1, 2).squeeze(-1)
41 |         if rescale:
42 |             x = (x + 1.0) / 2.0  # -1,1 -> 0,1
43 |         x = torch.clamp(x, 0, 1)
44 |         x = (x * 255).numpy().astype(np.uint8)
45 |         outputs.append(x)
46 | 
47 |     os.makedirs(os.path.dirname(path), exist_ok=True)
48 |     imageio.mimsave(path, outputs, fps=fps)
49 | 
50 | 
51 | def cache_video(
52 |     tensor,
53 |     save_file,
54 |     fps=30,
55 |     suffix=".mp4",
56 |     nrow=8,
57 |     normalize=True,
58 |     value_range=(-1, 1),
59 |     retry=5,
60 | ):
61 |     cache_file = save_file
62 | 
63 |     # save to cache
64 |     error = None
65 |     for _ in range(retry):
66 |         try:
67 |             # preprocess
68 |             tensor = tensor.clamp(min(value_range), max(value_range))
69 |             tensor = torch.stack(
70 |                 [torchvision.utils.make_grid(u, nrow=nrow, normalize=normalize, value_range=value_range) for u in tensor.unbind(2)],
71 |                 dim=1,
72 |             ).permute(1, 2, 3, 0)
73 |             tensor = (tensor * 255).type(torch.uint8).cpu()
74 | 
75 |             # write video
76 |             writer = imageio.get_writer(cache_file, fps=fps, codec="libx264", quality=8)
77 |             for frame in tensor.numpy():
78 |                 writer.append_data(frame)
79 |             writer.close()
80 |             return cache_file
81 |         except Exception as e:
82 |             error = e
83 |             continue
84 |     else:
85 |         logger.info(f"cache_video failed, error: {error}", flush=True)
86 |         return None
87 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.ruff]
 2 | exclude = [".git", ".mypy_cache", ".ruff_cache", ".venv", "dist"]
 3 | target-version = "py311"
 4 | line-length = 200
 5 | indent-width = 4
 6 | lint.ignore =["F"]
 7 | 
 8 | 
 9 | [tool.ruff.format]
10 | line-ending = "lf"
11 | quote-style = "double"
12 | indent-style = "space"
13 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | packaging
 2 | ninja
 3 | vllm
 4 | torch
 5 | torchvision
 6 | diffusers
 7 | transformers
 8 | tokenizers
 9 | accelerate
10 | safetensors
11 | opencv-python
12 | numpy
13 | imageio
14 | imageio-ffmpeg
15 | einops
16 | loguru
17 | sgl-kernel
18 | qtorch
19 | ftfy
20 | easydict
21 | 


--------------------------------------------------------------------------------
/save_results/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/lightx2v/607f968afb34fea159767b8c72d7e94137e76c96/save_results/.gitkeep


--------------------------------------------------------------------------------
/scripts/check_status.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from loguru import logger
 3 | 
 4 | 
 5 | response = requests.get("http://localhost:8000/v1/local/video/generate/service_status")
 6 | logger.info(response.json())
 7 | 
 8 | 
 9 | response = requests.get("http://localhost:8000/v1/local/video/generate/get_all_tasks")
10 | logger.info(response.json())
11 | 
12 | 
13 | response = requests.post("http://localhost:8000/v1/local/video/generate/task_status", json={"task_id": "test_task_001"})
14 | logger.info(response.json())
15 | 


--------------------------------------------------------------------------------
/scripts/deploy/start_dit_server.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | python -m lightx2v.common.apis.dit \
32 | --model_cls wan2.1 \
33 | --task i2v \
34 | --model_path $model_path \
35 | --config_json ${lightx2v_path}/configs/deploy/wan_i2v.json \
36 | --port 9000
37 | 


--------------------------------------------------------------------------------
/scripts/deploy/start_image_encoder_server.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | python -m lightx2v.common.apis.image_encoder \
32 | --model_cls wan2.1 \
33 | --task i2v \
34 | --model_path $model_path \
35 | --config_json ${lightx2v_path}/configs/deploy/wan_i2v.json \
36 | --port 9003
37 | 


--------------------------------------------------------------------------------
/scripts/deploy/start_prompt_enhancer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | python -m lightx2v.common.apis.prompt_enhancer \
32 | --model_path $model_path \
33 | --port 9001
34 | 


--------------------------------------------------------------------------------
/scripts/deploy/start_server.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | python -m lightx2v.api_server \
32 | --model_cls wan2.1 \
33 | --task i2v \
34 | --split \
35 | --model_path $model_path \
36 | --config_json ${lightx2v_path}/configs/deploy/wan_i2v.json \
37 | --port 8000
38 | 


--------------------------------------------------------------------------------
/scripts/deploy/start_text_encoder_server.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | python -m lightx2v.common.apis.text_encoder \
32 | --model_cls wan2.1 \
33 | --task i2v \
34 | --model_path $model_path \
35 | --config_json ${lightx2v_path}/configs/deploy/wan_i2v.json \
36 | --port 9002
37 | 


--------------------------------------------------------------------------------
/scripts/deploy/start_vae_server.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | python -m lightx2v.common.apis.vae \
32 | --model_cls wan2.1 \
33 | --task i2v \
34 | --model_path $model_path \
35 | --config_json ${lightx2v_path}/configs/deploy/wan_i2v.json \
36 | --port 9004
37 | 


--------------------------------------------------------------------------------
/scripts/post.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from loguru import logger
 3 | import random
 4 | import string
 5 | import time
 6 | from datetime import datetime
 7 | 
 8 | 
 9 | # same as lightx2v/utils/generate_task_id.py
10 | # from lightx2v.utils.generate_task_id import generate_task_id
11 | def generate_task_id():
12 |     """
13 |     Generate a random task ID in the format XXXX-XXXX-XXXX-XXXX-XXXX.
14 |     Features:
15 |     1. Does not modify the global random state.
16 |     2. Each X is an uppercase letter or digit (0-9).
17 |     3. Combines time factors to ensure high randomness.
18 |     For example: N1PQ-PRM5-N1BN-Z3S1-BGBJ
19 |     """
20 |     # Save the current random state (does not affect external randomness)
21 |     original_state = random.getstate()
22 | 
23 |     try:
24 |         # Define character set (uppercase letters + digits)
25 |         characters = string.ascii_uppercase + string.digits
26 | 
27 |         # Create an independent random instance
28 |         local_random = random.Random(time.perf_counter_ns())
29 | 
30 |         # Generate 5 groups of 4-character random strings
31 |         groups = []
32 |         for _ in range(5):
33 |             # Mix new time factor for each group
34 |             time_mix = int(datetime.now().timestamp())
35 |             local_random.seed(time_mix + local_random.getstate()[1][0] + time.perf_counter_ns())
36 | 
37 |             groups.append("".join(local_random.choices(characters, k=4)))
38 | 
39 |         return "-".join(groups)
40 | 
41 |     finally:
42 |         # Restore the original random state
43 |         random.setstate(original_state)
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     url = "http://localhost:8000/v1/local/video/generate"
48 | 
49 |     message = {
50 |         "task_id": generate_task_id(),  # task_id also can be string you like, such as "test_task_001"
51 |         "task_id_must_unique": True,  # If True, the task_id must be unique, otherwise, it will raise an error. Default is False.
52 |         "prompt": "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage.",
53 |         "negative_prompt": "色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走",
54 |         "image_path": "",
55 |         "save_video_path": "./output_lightx2v_wan_t2v_t02.mp4",  # It is best to set it to an absolute path.
56 |     }
57 | 
58 |     logger.info(f"message: {message}")
59 | 
60 |     response = requests.post(url, json=message)
61 | 
62 |     logger.info(f"response: {response.json()}")
63 | 


--------------------------------------------------------------------------------
/scripts/post_enhancer.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from loguru import logger
 3 | 
 4 | 
 5 | url = "http://localhost:8000/v1/local/video/generate"
 6 | 
 7 | message = {
 8 |     "task_id": "test_task_001",
 9 |     "prompt": "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage.",
10 |     "negative_prompt": "色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走",
11 |     "image_path": "",
12 |     "save_video_path": "./output_lightx2v_wan_t2v_enhanced.mp4",  # It is best to set it to an absolute path.
13 |     "use_prompt_enhancer": True,
14 | }
15 | 
16 | logger.info(f"message: {message}")
17 | 
18 | response = requests.post(url, json=message)
19 | 
20 | logger.info(f"response: {response.json()}")
21 | 


--------------------------------------------------------------------------------
/scripts/post_i2v.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from loguru import logger
 3 | import random
 4 | import string
 5 | import time
 6 | from datetime import datetime
 7 | 
 8 | 
 9 | # same as lightx2v/utils/generate_task_id.py
10 | # from lightx2v.utils.generate_task_id import generate_task_id
11 | def generate_task_id():
12 |     """
13 |     Generate a random task ID in the format XXXX-XXXX-XXXX-XXXX-XXXX.
14 |     Features:
15 |     1. Does not modify the global random state.
16 |     2. Each X is an uppercase letter or digit (0-9).
17 |     3. Combines time factors to ensure high randomness.
18 |     For example: N1PQ-PRM5-N1BN-Z3S1-BGBJ
19 |     """
20 |     # Save the current random state (does not affect external randomness)
21 |     original_state = random.getstate()
22 | 
23 |     try:
24 |         # Define character set (uppercase letters + digits)
25 |         characters = string.ascii_uppercase + string.digits
26 | 
27 |         # Create an independent random instance
28 |         local_random = random.Random(time.perf_counter_ns())
29 | 
30 |         # Generate 5 groups of 4-character random strings
31 |         groups = []
32 |         for _ in range(5):
33 |             # Mix new time factor for each group
34 |             time_mix = int(datetime.now().timestamp())
35 |             local_random.seed(time_mix + local_random.getstate()[1][0] + time.perf_counter_ns())
36 | 
37 |             groups.append("".join(local_random.choices(characters, k=4)))
38 | 
39 |         return "-".join(groups)
40 | 
41 |     finally:
42 |         # Restore the original random state
43 |         random.setstate(original_state)
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     url = "http://localhost:8000/v1/local/video/generate"
48 | 
49 |     message = {
50 |         "task_id": generate_task_id(),  # task_id also can be string you like, such as "test_task_001"
51 |         "task_id_must_unique": True,  # If True, the task_id must be unique, otherwise, it will raise an error. Default is False.
52 |         "prompt": "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside.",
53 |         "negative_prompt": "色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走",
54 |         "image_path": "./assets/inputs/imgs/img_0.jpg",
55 |         "save_video_path": "./output_lightx2v_wan_i2v_t02.mp4",  # It is best to set it to an absolute path.
56 |     }
57 | 
58 |     logger.info(f"message: {message}")
59 | 
60 |     response = requests.post(url, json=message)
61 | 
62 |     logger.info(f"response: {response.json()}")
63 | 


--------------------------------------------------------------------------------
/scripts/run_cogvideox_t2v.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using defalt value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | export PYTHONPATH=/mtc/wushuo/VideoGen/diffusers:$PYTHONPATH
32 | 
33 | python -m lightx2v.infer \
34 | --model_cls cogvideox \
35 | --task t2v \
36 | --model_path $model_path \
37 | --config_json ${lightx2v_path}/configs/cogvideox_t2v.json \
38 | --prompt "A little girl smile." \
39 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_cogvideox_t2v.mp4
40 | 


--------------------------------------------------------------------------------
/scripts/run_hunyuan_i2v.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | python -m lightx2v.infer \
32 | --model_cls hunyuan \
33 | --task i2v \
34 | --model_path $model_path \
35 | --config_json ${lightx2v_path}/configs/hunyuan_i2v.json \
36 | --prompt "An Asian man with short hair in black tactical uniform and white clothes waves a firework stick." \
37 | --image_path ${lightx2v_path}/assets/inputs/imgs/img_1.jpg \
38 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_hy_i2v.mp4
39 | 


--------------------------------------------------------------------------------
/scripts/run_hunyuan_i2v_save_quant.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | 
32 | # =========================
33 | # save quantization weight
34 | # =========================
35 | 
36 | export RUNNING_FLAG=save_naive_quant
37 | 
38 | python -m lightx2v.infer \
39 | --model_cls hunyuan \
40 | --task i2v \
41 | --model_path $model_path \
42 | --config_json ${lightx2v_path}/configs/hunyuan_i2v_save_quant.json \
43 | --prompt "An Asian man with short hair in black tactical uniform and white clothes waves a firework stick." \
44 | --image_path ${lightx2v_path}/assets/inputs/imgs/img_1.jpg \
45 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_hy_i2v.mp4
46 | 
47 | sleep 2
48 | 
49 | # =========================
50 | # load quantization weight and inference
51 | # =========================
52 | 
53 | export RUNNING_FLAG=infer
54 | 
55 | python -m lightx2v.infer \
56 | --model_cls hunyuan \
57 | --task i2v \
58 | --model_path $model_path \
59 | --config_json ${lightx2v_path}/configs/hunyuan_i2v_save_quant.json \
60 | --prompt "An Asian man with short hair in black tactical uniform and white clothes waves a firework stick." \
61 | --image_path ${lightx2v_path}/assets/inputs/imgs/img_1.jpg \
62 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_hy_i2v.mp4
63 | 


--------------------------------------------------------------------------------
/scripts/run_hunyuan_i2v_taylorseer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using defalt value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | python -m lightx2v.infer \
32 | --model_cls hunyuan \
33 | --task i2v \
34 | --model_path $model_path \
35 | --config_json ${lightx2v_path}/configs/caching/hunyuan_i2v_TaylorSeer.json \
36 | --prompt "An Asian man with short hair in black tactical uniform and white clothes waves a firework stick." \
37 | --image_path ${lightx2v_path}/assets/inputs/imgs/img_1.jpg \
38 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_hy_i2v_taylor.mp4
39 | 


--------------------------------------------------------------------------------
/scripts/run_hunyuan_i2v_tea.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using defalt value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | python -m lightx2v.infer \
32 | --model_cls hunyuan \
33 | --task i2v \
34 | --model_path $model_path \
35 | --config_json ${lightx2v_path}/configs/caching/hunyuan_i2v_Tea.json \
36 | --prompt "An Asian man with short hair in black tactical uniform and white clothes waves a firework stick." \
37 | --image_path ${lightx2v_path}/assets/inputs/imgs/img_1.jpg \
38 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_hy_i2v_tea.mp4
39 | 


--------------------------------------------------------------------------------
/scripts/run_hunyuan_t2v.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | python -m lightx2v.infer \
32 | --model_cls hunyuan \
33 | --task t2v \
34 | --model_path $model_path \
35 | --config_json ${lightx2v_path}/configs/hunyuan_t2v.json \
36 | --prompt "A cat walks on the grass, realistic style." \
37 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_hy_t2v.mp4
38 | 


--------------------------------------------------------------------------------
/scripts/run_hunyuan_t2v_dist.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0,1,2,3
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | 
30 | torchrun --nproc_per_node=4 ${lightx2v_path}/lightx2v/infer.py \
31 | --model_cls hunyuan \
32 | --task t2v \
33 | --model_path $model_path \
34 | --config_json ${lightx2v_path}/configs/dist/hunyuan_t2v_dist_ulysses.json \
35 | --prompt "A cat walks on the grass, realistic style." \
36 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_hunyuan_t2v_dist_ulysses.mp4
37 | 
38 | torchrun --nproc_per_node=4 ${lightx2v_path}/lightx2v/infer.py \
39 | --model_cls hunyuan \
40 | --task t2v \
41 | --model_path $model_path \
42 | --config_json ${lightx2v_path}/configs/dist/hunyuan_t2v_dist_ring.json \
43 | --prompt "A cat walks on the grass, realistic style." \
44 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_hunyuan_t2v_dist_ring.mp4
45 | 


--------------------------------------------------------------------------------
/scripts/run_hunyuan_t2v_save_quant.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | 
32 | # =========================
33 | # save quantization weight
34 | # =========================
35 | 
36 | export RUNNING_FLAG=save_naive_quant
37 | 
38 | python -m lightx2v.infer \
39 | --model_cls hunyuan \
40 | --task t2v \
41 | --model_path $model_path \
42 | --config_json ${lightx2v_path}/configs/hunyuan_t2v_save_quant.json \
43 | --prompt "A cat walks on the grass, realistic style." \
44 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_hy_t2v.mp4
45 | 
46 | sleep 2
47 | 
48 | # =========================
49 | # load quantization weight and inference
50 | # =========================
51 | 
52 | export RUNNING_FLAG=infer
53 | 
54 | python -m lightx2v.infer \
55 | --model_cls hunyuan \
56 | --task t2v \
57 | --model_path $model_path \
58 | --config_json ${lightx2v_path}/configs/hunyuan_t2v_save_quant.json \
59 | --prompt "A cat walks on the grass, realistic style." \
60 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_hy_t2v.mp4
61 | 


--------------------------------------------------------------------------------
/scripts/run_hunyuan_t2v_taylorseer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | 
30 | python -m lightx2v.infer \
31 | --model_cls hunyuan \
32 | --task t2v \
33 | --model_path $model_path \
34 | --config_json ${lightx2v_path}/configs/caching/hunyuan_t2v_TaylorSeer.json \
35 | --prompt "A cat walks on the grass, realistic style." \
36 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_hy_t2v_taylor.mp4
37 | 


--------------------------------------------------------------------------------
/scripts/run_hunyuan_t2v_tea.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using defalt value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | 
30 | python -m lightx2v.infer \
31 | --model_cls hunyuan \
32 | --task t2v \
33 | --model_path $model_path \
34 | --config_json ${lightx2v_path}/configs/caching/hunyuan_t2v_Tea.json \
35 | --prompt "A cat walks on the grass, realistic style." \
36 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_hy_t2v_tea.mp4
37 | 


--------------------------------------------------------------------------------
/scripts/run_wan_i2v.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | python -m lightx2v.infer \
32 | --model_cls wan2.1 \
33 | --task i2v \
34 | --model_path $model_path \
35 | --config_json ${lightx2v_path}/configs/wan_i2v.json \
36 | --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
37 | --negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
38 | --image_path ${lightx2v_path}/assets/inputs/imgs/img_0.jpg \
39 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_i2v.mp4
40 | 


--------------------------------------------------------------------------------
/scripts/run_wan_i2v_advanced_ptq.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | # =========================
32 | # load quantization weight and inference
33 | # =========================
34 | 
35 | export RUNNING_FLAG=infer
36 | 
37 | python -m lightx2v.infer \
38 | --model_cls wan2.1 \
39 | --task i2v \
40 | --model_path $model_path \
41 | --config_json ${lightx2v_path}/configs/advanced_ptq/wan_i2v.json \
42 | --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
43 | --negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
44 | --image_path ${lightx2v_path}/assets/inputs/imgs/img_0.jpg \
45 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_i2v.mp4
46 | 


--------------------------------------------------------------------------------
/scripts/run_wan_i2v_causvid.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path="/mnt/Text2Video/wangshankun/lightx2v/"
 5 | model_path="/mnt/Text2Video/wangshankun/HF_Cache/Wan2.1-I2V-14B-CausVid/"
 6 | # check section
 7 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 8 |     cuda_devices=0
 9 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
10 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
11 | fi
12 | 
13 | if [ -z "${lightx2v_path}" ]; then
14 |     echo "Error: lightx2v_path is not set. Please set this variable first."
15 |     exit 1
16 | fi
17 | 
18 | if [ -z "${model_path}" ]; then
19 |     echo "Error: model_path is not set. Please set this variable first."
20 |     exit 1
21 | fi
22 | 
23 | export TOKENIZERS_PARALLELISM=false
24 | 
25 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
26 | 
27 | export ENABLE_PROFILING_DEBUG=true
28 | export ENABLE_GRAPH_MODE=false
29 | 
30 | python -m lightx2v.infer \
31 | --model_cls wan2.1_causvid \
32 | --task i2v \
33 | --image_path ${lightx2v_path}/assets/inputs/imgs/img_0.jpg \
34 | --model_path $model_path \
35 | --config_json ${lightx2v_path}/configs/wan_i2v_causvid.json \
36 | --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
37 | --negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
38 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_i2v_causvid.mp4
39 | 


--------------------------------------------------------------------------------
/scripts/run_wan_i2v_dist.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=1,2,3,4
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | torchrun --nproc_per_node=4 -m lightx2v.infer \
32 | --model_cls wan2.1 \
33 | --task i2v \
34 | --model_path $model_path \
35 | --config_json ${lightx2v_path}/configs/wan_i2v_dist.json \
36 | --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
37 | --negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
38 | --image_path ${lightx2v_path}/assets/inputs/imgs/img_0.jpg \
39 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_i2v.mp4
40 | 


--------------------------------------------------------------------------------
/scripts/run_wan_i2v_save_quant.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | 
32 | # =========================
33 | # save quantization weight
34 | # =========================
35 | 
36 | export RUNNING_FLAG=save_naive_quant
37 | 
38 | python -m lightx2v.infer \
39 | --model_cls wan2.1 \
40 | --task i2v \
41 | --model_path $model_path \
42 | --config_json ${lightx2v_path}/configs/wan_i2v_save_quant.json \
43 | --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
44 | --negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
45 | --image_path ${lightx2v_path}/assets/inputs/imgs/img_0.jpg \
46 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_i2v.mp4
47 | 
48 | sleep 2
49 | 
50 | # =========================
51 | # load quantization weight and inference
52 | # =========================
53 | 
54 | export RUNNING_FLAG=infer
55 | 
56 | python -m lightx2v.infer \
57 | --model_cls wan2.1 \
58 | --task i2v \
59 | --model_path $model_path \
60 | --config_json ${lightx2v_path}/configs/wan_i2v_save_quant.json \
61 | --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
62 | --negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
63 | --image_path ${lightx2v_path}/assets/inputs/imgs/img_0.jpg \
64 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_i2v.mp4
65 | 


--------------------------------------------------------------------------------
/scripts/run_wan_i2v_tea.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using defalt value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | python -m lightx2v.infer \
32 | --model_cls wan2.1 \
33 | --task i2v \
34 | --model_path $model_path \
35 | --config_json ${lightx2v_path}/configs/caching/wan_i2v_Tea.json \
36 | --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
37 | --negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
38 | --image_path ${lightx2v_path}/assets/inputs/imgs/img_0.jpg \
39 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_i2v_tea.mp4
40 | 


--------------------------------------------------------------------------------
/scripts/run_wan_i2v_with_lora.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | lora_path=
 7 | 
 8 | # check section
 9 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
10 |     cuda_devices=0
11 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
12 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
13 | fi
14 | 
15 | if [ -z "${model_path}" ]; then
16 |     echo "Error: model_path is not set. Please set this variable first."
17 |     exit 1
18 | fi
19 | 
20 | if [ -z "${lora_path}" ]; then
21 |     echo "Error: lora_path is not set. Please set this variable first."
22 |     exit 1
23 | fi
24 | 
25 | export TOKENIZERS_PARALLELISM=false
26 | 
27 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
28 | 
29 | export ENABLE_PROFILING_DEBUG=true
30 | 
31 | python -m lightx2v.infer \
32 | --model_cls wan2.1 \
33 | --task i2v \
34 | --model_path $model_path \
35 | --prompt "画面中的物体轻轻向上跃起，变成了外貌相似的毛绒玩具。毛绒玩具有着一双眼睛，它的颜色和之前的一样。然后，它开始跳跃起来。背景保持一致，气氛显得格外俏皮。" \
36 | --infer_steps 40 \
37 | --target_video_length 81 \
38 | --target_width  832 \
39 | --target_height 480 \
40 | --attention_type flash_attn3 \
41 | --seed 42 \
42 | --negative_prompt "画面过曝，模糊，文字，字幕" \
43 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_i2v.mp4 \
44 | --sample_guide_scale 5 \
45 | --sample_shift 5 \
46 | --image_path ${lightx2v_path}/assets/inputs/imgs/img_0.jpg \
47 | --lora_path ${lora_path} \
48 | --feature_caching Tea \
49 | --mm_config '{"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Vllm", "weight_auto_quant": true}' \
50 | # --mm_config '{"mm_type": "Default", "weight_auto_quant": true}' \
51 | # --use_ret_steps \
52 | 


--------------------------------------------------------------------------------
/scripts/run_wan_skyreels_v2_df.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # set path and first
 3 | lightx2v_path="/mnt/Text2Video/wangshankun/tmp_code/lightx2v/"
 4 | model_path="/mnt/Text2Video/wangshankun/HF_Cache/hub/models--Skywork--SkyReels-V2-DF-14B-540P/snapshots/7ff972ba7b6a33d2f6e6c976dd3cf2d36984eee4/"
 5 | 
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | #I2V
32 | python -m lightx2v.infer \
33 | --model_cls wan2.1_skyreels_v2_df \
34 | --task t2v \
35 | --model_path $model_path \
36 | --config_json ${lightx2v_path}/configs/wan_skyreels_v2_df.json \
37 | --image_path ${lightx2v_path}/assets/inputs/imgs/img_0.jpg \
38 | --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
39 | --negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
40 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_skyreels_v2_df.mp4
41 | 
42 | #T2V
43 | #python -m lightx2v.infer \
44 | #--model_cls wan2.1_df \
45 | #--task t2v \
46 | #--model_path $model_path \
47 | #--config_json ${lightx2v_path}/configs/wan_skyreels_v2_df.json \
48 | #--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
49 | #--negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
50 | #--save_video_path ${lightx2v_path}/save_results/output_lightx2v_skyreels_v2_df.mp4
51 | 


--------------------------------------------------------------------------------
/scripts/run_wan_skyreels_v2_i2v.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # set path and first
 3 | lightx2v_path=
 4 | model_path=
 5 | 
 6 | # check section
 7 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 8 |     cuda_devices=0
 9 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
10 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
11 | fi
12 | 
13 | if [ -z "${lightx2v_path}" ]; then
14 |     echo "Error: lightx2v_path is not set. Please set this variable first."
15 |     exit 1
16 | fi
17 | 
18 | if [ -z "${model_path}" ]; then
19 |     echo "Error: model_path is not set. Please set this variable first."
20 |     exit 1
21 | fi
22 | 
23 | export TOKENIZERS_PARALLELISM=false
24 | 
25 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
26 | 
27 | export ENABLE_PROFILING_DEBUG=true
28 | export ENABLE_GRAPH_MODE=false
29 | 
30 | python -m lightx2v.infer \
31 | --model_cls wan2.1 \
32 | --task i2v \
33 | --model_path $model_path \
34 | --config_json ${lightx2v_path}/configs/wan_skyreels_v2_i2v.json \
35 | --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
36 | --negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
37 | --image_path ${lightx2v_path}/assets/inputs/imgs/img_0.jpg \
38 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_skyreels_v2_i2v.mp4
39 | 


--------------------------------------------------------------------------------
/scripts/run_wan_skyreels_v2_t2v.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | python -m lightx2v.infer \
32 | --model_cls wan2.1 \
33 | --task t2v \
34 | --model_path $model_path \
35 | --config_json ${lightx2v_path}/configs/wan_skyreels_v2_t2v.json \
36 | --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
37 | --negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
38 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_skyreels_v2_t2v.mp4
39 | 


--------------------------------------------------------------------------------
/scripts/run_wan_t2v.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | python -m lightx2v.infer \
32 | --model_cls wan2.1 \
33 | --task t2v \
34 | --model_path $model_path \
35 | --config_json ${lightx2v_path}/configs/wan_t2v.json \
36 | --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
37 | --negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
38 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v.mp4
39 | 


--------------------------------------------------------------------------------
/scripts/run_wan_t2v_causvid.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | python -m lightx2v.infer \
32 | --model_cls wan2.1_causvid \
33 | --task t2v \
34 | --model_path $model_path \
35 | --config_json ${lightx2v_path}/configs/wan_t2v_causvid.json \
36 | --prompt "Two anthropomorphic cats fight intensely on a spotlighted stage; the left cat wearing blue boxing gear with matching gloves, the right cat in bright red boxing attire and gloves." \
37 | --negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
38 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v_causvid.mp4
39 | 


--------------------------------------------------------------------------------
/scripts/run_wan_t2v_causvid_save_quant.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | 
32 | # =========================
33 | # save quantization weight
34 | # =========================
35 | 
36 | export RUNNING_FLAG=save_naive_quant
37 | 
38 | python -m lightx2v.infer \
39 | --model_cls wan2.1_causvid \
40 | --task t2v \
41 | --model_path $model_path \
42 | --config_json ${lightx2v_path}/configs/wan_t2v_causvid_save_quant.json \
43 | --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
44 | --negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
45 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v_causvid.mp4
46 | 
47 | sleep 2
48 | 
49 | # =========================
50 | # load quantization weight and inference
51 | # =========================
52 | 
53 | export RUNNING_FLAG=infer
54 | 
55 | python -m lightx2v.infer \
56 | --model_cls wan2.1_causvid \
57 | --task t2v \
58 | --model_path $model_path \
59 | --config_json ${lightx2v_path}/configs/wan_t2v_causvid_save_quant.json \
60 | --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
61 | --negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
62 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v_causvid.mp4
63 | 


--------------------------------------------------------------------------------
/scripts/run_wan_t2v_dist.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=1,2,3,4
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | torchrun --nproc_per_node=4 -m lightx2v.infer \
32 | --model_cls wan2.1 \
33 | --task t2v \
34 | --model_path $model_path \
35 | --config_json ${lightx2v_path}/configs/wan_t2v_dist.json \
36 | --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
37 | --negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
38 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v.mp4
39 | 


--------------------------------------------------------------------------------
/scripts/run_wan_t2v_enhancer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | prompt_enhancer_path=
 7 | 
 8 | # check section
 9 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
10 |     cuda_devices=0,1
11 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
12 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
13 | fi
14 | 
15 | if [ -z "${lightx2v_path}" ]; then
16 |     echo "Error: lightx2v_path is not set. Please set this variable first."
17 |     exit 1
18 | fi
19 | 
20 | if [ -z "${model_path}" ]; then
21 |     echo "Error: model_path is not set. Please set this variable first."
22 |     exit 1
23 | fi
24 | 
25 | export TOKENIZERS_PARALLELISM=false
26 | 
27 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
28 | 
29 | export ENABLE_PROFILING_DEBUG=true
30 | export ENABLE_GRAPH_MODE=false
31 | 
32 | python -m lightx2v.infer \
33 | --model_cls wan2.1 \
34 | --task t2v \
35 | --model_path $model_path \
36 | --config_json ${lightx2v_path}/configs/wan_t2v.json \
37 | --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
38 | --prompt_enhancer ${prompt_enhancer_path} \
39 | --negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
40 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v.mp4
41 | 


--------------------------------------------------------------------------------
/scripts/run_wan_t2v_save_quant.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | 
32 | # =========================
33 | # save quantization weight
34 | # =========================
35 | 
36 | export RUNNING_FLAG=save_naive_quant
37 | 
38 | python -m lightx2v.infer \
39 | --model_cls wan2.1 \
40 | --task t2v \
41 | --model_path $model_path \
42 | --config_json ${lightx2v_path}/configs/wan_t2v_save_quant.json \
43 | --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
44 | --negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
45 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v.mp4
46 | 
47 | sleep 2
48 | 
49 | # =========================
50 | # load quantization weight and inference
51 | # =========================
52 | 
53 | export RUNNING_FLAG=infer
54 | 
55 | python -m lightx2v.infer \
56 | --model_cls wan2.1 \
57 | --task t2v \
58 | --model_path $model_path \
59 | --config_json ${lightx2v_path}/configs/wan_t2v_save_quant.json \
60 | --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
61 | --negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
62 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v.mp4
63 | 


--------------------------------------------------------------------------------
/scripts/run_wan_t2v_sparge.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | #! test with 1.3B
 5 | lightx2v_path=
 6 | model_path=
 7 | 
 8 | # check section
 9 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
10 |     cuda_devices=0
11 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
12 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
13 | fi
14 | 
15 | if [ -z "${lightx2v_path}" ]; then
16 |     echo "Error: lightx2v_path is not set. Please set this variable first."
17 |     exit 1
18 | fi
19 | 
20 | if [ -z "${model_path}" ]; then
21 |     echo "Error: model_path is not set. Please set this variable first."
22 |     exit 1
23 | fi
24 | 
25 | export TOKENIZERS_PARALLELISM=false
26 | 
27 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
28 | 
29 | export ENABLE_PROFILING_DEBUG=true
30 | export ENABLE_GRAPH_MODE=false
31 | 
32 | python -m lightx2v.infer \
33 | --model_cls wan2.1 \
34 | --task t2v \
35 | --model_path $model_path \
36 | --config_json ${lightx2v_path}/configs/wan_t2v_sparge.json \
37 | --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
38 | --negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
39 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v.mp4
40 | 


--------------------------------------------------------------------------------
/scripts/run_wan_t2v_tea.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using defalt value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | python -m lightx2v.infer \
32 | --model_cls wan2.1 \
33 | --task t2v \
34 | --model_path $model_path \
35 | --config_json ${lightx2v_path}/configs/caching/wan_t2v_Tea.json \
36 | --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
37 | --negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
38 | --save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v_tea.mp4
39 | 


--------------------------------------------------------------------------------
/scripts/start_multi_servers.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Default values
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0,1,2,3,4,5,6,7
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${num_gpus}" ]; then
15 |     num_gpus=8
16 | fi
17 | 
18 | # Check required parameters
19 | if [ -z "$lightx2v_path" ]; then
20 |     echo "Error: lightx2v_path not set"
21 |     exit 1
22 | fi
23 | 
24 | if [ -z "$model_path" ]; then
25 |     echo "Error: model_path not set"
26 |     exit 1
27 | fi
28 | 
29 | # Set environment variables
30 | export TOKENIZERS_PARALLELISM=false
31 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
32 | export ENABLE_PROFILING_DEBUG=true
33 | export ENABLE_GRAPH_MODE=false
34 | 
35 | # Start multiple servers
36 | python -m lightx2v.api_multi_servers \
37 |     --num_gpus $num_gpus \
38 |     --start_port 8000 \
39 |     --model_cls wan2.1 \
40 |     --task t2v \
41 |     --model_path $model_path \
42 |     --config_json ${lightx2v_path}/configs/wan_t2v.json
43 | 


--------------------------------------------------------------------------------
/scripts/start_server.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | 
 7 | # check section
 8 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
 9 |     cuda_devices=0
10 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
11 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
12 | fi
13 | 
14 | if [ -z "${lightx2v_path}" ]; then
15 |     echo "Error: lightx2v_path is not set. Please set this variable first."
16 |     exit 1
17 | fi
18 | 
19 | if [ -z "${model_path}" ]; then
20 |     echo "Error: model_path is not set. Please set this variable first."
21 |     exit 1
22 | fi
23 | 
24 | export TOKENIZERS_PARALLELISM=false
25 | 
26 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
27 | 
28 | export ENABLE_PROFILING_DEBUG=true
29 | export ENABLE_GRAPH_MODE=false
30 | 
31 | python -m lightx2v.api_server \
32 | --model_cls wan2.1 \
33 | --task t2v \
34 | --model_path $model_path \
35 | --config_json ${lightx2v_path}/configs/wan_t2v.json \
36 | --port 8000
37 | 


--------------------------------------------------------------------------------
/scripts/start_server_enhancer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set path and first
 4 | lightx2v_path=
 5 | model_path=
 6 | prompt_enhancer_path=
 7 | 
 8 | # check section
 9 | if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
10 |     cuda_devices=0,1
11 |     echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
12 |     export CUDA_VISIBLE_DEVICES=${cuda_devices}
13 | fi
14 | 
15 | if [ -z "${lightx2v_path}" ]; then
16 |     echo "Error: lightx2v_path is not set. Please set this variable first."
17 |     exit 1
18 | fi
19 | 
20 | if [ -z "${model_path}" ]; then
21 |     echo "Error: model_path is not set. Please set this variable first."
22 |     exit 1
23 | fi
24 | 
25 | export TOKENIZERS_PARALLELISM=false
26 | 
27 | export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
28 | 
29 | export ENABLE_PROFILING_DEBUG=true
30 | export ENABLE_GRAPH_MODE=false
31 | 
32 | python -m lightx2v.api_server \
33 | --model_cls wan2.1_causvid \
34 | --task t2v \
35 | --model_path $model_path \
36 | --config_json ${lightx2v_path}/configs/wan_t2v_causvid.json \
37 | --prompt_enhancer ${prompt_enhancer_path} \
38 | --port 8000
39 | 


--------------------------------------------------------------------------------
/scripts/stop_running_task.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from loguru import logger
3 | 
4 | 
5 | response = requests.get("http://localhost:8000/v1/local/video/generate/stop_running_task")
6 | logger.info(response.json())
7 | 


--------------------------------------------------------------------------------