├── .gitattributes ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── build-run.sh ├── frontend ├── .gitignore ├── .npmrc ├── .nvmrc ├── .prettierignore ├── .prettierrc ├── README.md ├── package-lock.json ├── package.json ├── src │ ├── app.css │ ├── app.d.ts │ ├── app.html │ ├── lib │ │ ├── components │ │ │ ├── AspectRatioSelect.svelte │ │ │ ├── Button.svelte │ │ │ ├── Checkbox.svelte │ │ │ ├── ImagePlayer.svelte │ │ │ ├── InputRange.svelte │ │ │ ├── MediaListSwitcher.svelte │ │ │ ├── PipelineOptions.svelte │ │ │ ├── SeedInput.svelte │ │ │ ├── Selectlist.svelte │ │ │ ├── TextArea.svelte │ │ │ ├── VideoInput.svelte │ │ │ └── Warning.svelte │ │ ├── icons │ │ │ ├── aspect.svelte │ │ │ ├── expand.svelte │ │ │ ├── floppy.svelte │ │ │ ├── screen.svelte │ │ │ └── spinner.svelte │ │ ├── index.ts │ │ ├── lcmLive.ts │ │ ├── mediaStream.ts │ │ ├── store.ts │ │ ├── types.ts │ │ └── utils.ts │ ├── piexifjs.d.ts │ └── routes │ │ ├── +layout.svelte │ │ ├── +page.svelte │ │ └── +page.ts ├── static │ └── favicon.png ├── svelte.config.js ├── tailwind.config.js ├── tsconfig.json └── vite.config.ts ├── qr-code.png └── server ├── config.py ├── connection_manager.py ├── device.py ├── main.py ├── pipelines ├── IPcompositionHyperSD15.py ├── IPcompositionHyperSDXL.py ├── __init__.py ├── controlnet.py ├── controlnetDepthFlashSD.py ├── controlnetDepthHyperSD.py ├── controlnetDepthHyperSDXL.py ├── controlnetFlashSD.py ├── controlnetFlashSDXL.py ├── controlnetHyperSD.py ├── controlnetHyperSDXL.py ├── controlnetLoraSD15.py ├── controlnetLoraSD15QRCode.py ├── controlnetLoraSDXL-Lightning.py ├── controlnetLoraSDXL.py ├── controlnetMistoLineHyperSDXL.py ├── controlnetPCMSD15.py ├── controlnetSDTurbo.py ├── controlnetSDXLTurbo.py ├── controlnetSegmindVegaRT.py ├── img2img.py ├── img2imgFlux.py ├── img2imgSDTurbo.py ├── img2imgSDXL-Lightning.py ├── img2imgSDXLTurbo.py ├── img2imgSDXS512.py ├── img2imgSegmindVegaRT.py ├── pix2pix │ ├── __init__.py │ ├── model.py │ └── pix2pix_turbo.py ├── pix2pixTurbo.py ├── txt2img.py ├── txt2imgLora.py ├── txt2imgLoraSDXL.py └── utils │ ├── canny_gpu.py │ └── safety_checker.py ├── requirements.txt └── util.py /.gitattributes: -------------------------------------------------------------------------------- 1 | *.7z filter=lfs diff=lfs merge=lfs -text 2 | *.arrow filter=lfs diff=lfs merge=lfs -text 3 | *.bin filter=lfs diff=lfs merge=lfs -text 4 | *.bz2 filter=lfs diff=lfs merge=lfs -text 5 | *.ckpt filter=lfs diff=lfs merge=lfs -text 6 | *.ftz filter=lfs diff=lfs merge=lfs -text 7 | *.gz filter=lfs diff=lfs merge=lfs -text 8 | *.h5 filter=lfs diff=lfs merge=lfs -text 9 | *.joblib filter=lfs diff=lfs merge=lfs -text 10 | *.lfs.* filter=lfs diff=lfs merge=lfs -text 11 | *.mlmodel filter=lfs diff=lfs merge=lfs -text 12 | *.model filter=lfs diff=lfs merge=lfs -text 13 | *.msgpack filter=lfs diff=lfs merge=lfs -text 14 | *.npy filter=lfs diff=lfs merge=lfs -text 15 | *.npz filter=lfs diff=lfs merge=lfs -text 16 | *.onnx filter=lfs diff=lfs merge=lfs -text 17 | *.ot filter=lfs diff=lfs merge=lfs -text 18 | *.parquet filter=lfs diff=lfs merge=lfs -text 19 | *.pb filter=lfs diff=lfs merge=lfs -text 20 | *.pickle filter=lfs diff=lfs merge=lfs -text 21 | *.pkl filter=lfs diff=lfs merge=lfs -text 22 | *.pt filter=lfs diff=lfs merge=lfs -text 23 | *.pth filter=lfs diff=lfs merge=lfs -text 24 | *.rar filter=lfs diff=lfs merge=lfs -text 25 | *.safetensors filter=lfs diff=lfs merge=lfs -text 26 | saved_model/**/* filter=lfs diff=lfs merge=lfs -text 27 | *.tar.* filter=lfs diff=lfs merge=lfs -text 28 | *.tar filter=lfs diff=lfs merge=lfs -text 29 | *.tflite filter=lfs diff=lfs merge=lfs -text 30 | *.tgz filter=lfs diff=lfs merge=lfs -text 31 | *.wasm filter=lfs diff=lfs merge=lfs -text 32 | *.xz filter=lfs diff=lfs merge=lfs -text 33 | *.zip filter=lfs diff=lfs merge=lfs -text 34 | *.zst filter=lfs diff=lfs merge=lfs -text 35 | *tfevents* filter=lfs diff=lfs merge=lfs -text 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | venv/ 3 | public/ 4 | *.pem -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04 2 | 3 | ARG DEBIAN_FRONTEND=noninteractive 4 | 5 | ENV PYTHONUNBUFFERED=1 6 | ENV NODE_MAJOR=20 7 | 8 | RUN apt-get update && apt-get install --no-install-recommends -y \ 9 | build-essential \ 10 | python3.9 \ 11 | python3-pip \ 12 | python3-dev \ 13 | git \ 14 | ffmpeg \ 15 | google-perftools \ 16 | ca-certificates curl gnupg \ 17 | && apt-get clean && rm -rf /var/lib/apt/lists/* 18 | 19 | WORKDIR /code 20 | 21 | RUN mkdir -p /etc/apt/keyrings 22 | RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg 23 | 24 | RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_MAJOR}.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list > /dev/null 25 | RUN apt-get update && apt-get install nodejs -y 26 | 27 | COPY ./server/requirements.txt /code/requirements.txt 28 | 29 | # Download and install UV 30 | ADD https://astral.sh/uv/install.sh /uv-installer.sh 31 | RUN chmod +x /uv-installer.sh && \ 32 | /uv-installer.sh && \ 33 | rm /uv-installer.sh 34 | 35 | ENV PATH="/root/.local/bin:$PATH" 36 | 37 | # Set up a new user named "user" with user ID 1000 38 | RUN useradd -m -u 1000 user 39 | 40 | # Install dependencies using UV as root 41 | RUN uv pip install --no-cache --system --index-strategy=unsafe-best-match -r /code/requirements.txt 42 | 43 | # Switch to the "user" user 44 | USER user 45 | 46 | # Set home to the user's home directory 47 | ENV HOME=/home/user \ 48 | PATH=/home/user/.local/bin:/root/.local/bin:$PATH \ 49 | PYTHONPATH=$HOME/app \ 50 | PYTHONUNBUFFERED=1 \ 51 | SYSTEM=spaces 52 | 53 | # Set the working directory to the user's home directory 54 | WORKDIR $HOME/app 55 | 56 | # Copy the current directory contents into the container at $HOME/app setting the owner to the user 57 | COPY --chown=user . $HOME/app 58 | 59 | ENV LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4 60 | CMD ["./build-run.sh"] -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Real-Time Latent Consistency Model Image-to-Image ControlNet 3 | emoji: 🖼️🖼️ 4 | colorFrom: gray 5 | colorTo: indigo 6 | sdk: docker 7 | pinned: false 8 | suggested_hardware: a10g-small 9 | disable_embedding: true 10 | --- 11 | 12 | # Real-Time Latent Consistency Model 13 | 14 | This demo showcases [Latent Consistency Model (LCM)](https://latent-consistency-models.github.io/) using [Diffusers](https://huggingface.co/docs/diffusers/using-diffusers/lcm) with a MJPEG stream server. You can read more about LCM + LoRAs with diffusers [here](https://huggingface.co/blog/lcm_lora). 15 | 16 | You need a webcam to run this demo. 🤗 17 | 18 | See a collecting with live demos [here](https://huggingface.co/collections/latent-consistency/latent-consistency-model-demos-654e90c52adb0688a0acbe6f) 19 | 20 | ## Running Locally 21 | 22 | You need CUDA and Python 3.10, Node > 19, Mac with an M1/M2/M3 chip or Intel Arc GPU 23 | 24 | 25 | ## Install 26 | 27 | ```bash 28 | uv venv --python=3.10 29 | source .venv/bin/activate 30 | uv pip install -r server/requirements.txt 31 | cd frontend && npm install && npm run build && cd .. 32 | python server/main.py --reload --pipeline img2imgSDTurbo 33 | ``` 34 | 35 | Don't forget to fuild the frontend!!! 36 | 37 | ```bash 38 | cd frontend && npm install && npm run build && cd .. 39 | ``` 40 | 41 | # Pipelines 42 | You can build your own pipeline following examples here [here](pipelines), 43 | 44 | 45 | # LCM 46 | ### Image to Image 47 | 48 | ```bash 49 | python server/main.py --reload --pipeline img2img 50 | ``` 51 | 52 | # LCM 53 | ### Text to Image 54 | 55 | ```bash 56 | python server/main.py --reload --pipeline txt2img 57 | ``` 58 | 59 | ### Image to Image ControlNet Canny 60 | 61 | ```bash 62 | python server/main.py --reload --pipeline controlnet 63 | ``` 64 | 65 | 66 | # LCM + LoRa 67 | 68 | Using LCM-LoRA, giving it the super power of doing inference in as little as 4 steps. [Learn more here](https://huggingface.co/blog/lcm_lora) or [technical report](https://huggingface.co/papers/2311.05556) 69 | 70 | 71 | ### Image to Image ControlNet Canny LoRa 72 | 73 | ```bash 74 | python server/main.py --reload --pipeline controlnetLoraSD15 75 | ``` 76 | or SDXL, note that SDXL is slower than SD15 since the inference runs on 1024x1024 images 77 | 78 | ```bash 79 | python server/main.py --reload --pipeline controlnetLoraSDXL 80 | ``` 81 | 82 | ### Text to Image 83 | 84 | ```bash 85 | python server/main.py --reload --pipeline txt2imgLora 86 | ``` 87 | 88 | ```bash 89 | python server/main.py --reload --pipeline txt2imgLoraSDXL 90 | ``` 91 | # Available Pipelines 92 | 93 | #### [LCM](https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7) 94 | 95 | `img2img` 96 | `txt2img` 97 | `controlnet` 98 | `txt2imgLora` 99 | `controlnetLoraSD15` 100 | 101 | #### [SD15](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0) 102 | `controlnetLoraSDXL` 103 | `txt2imgLoraSDXL` 104 | 105 | #### [SDXL Turbo](https://huggingface.co/stabilityai/sd-xl-turbo) 106 | 107 | `img2imgSDXLTurbo` 108 | `controlnetSDXLTurbo` 109 | 110 | 111 | #### [SDTurbo](https://huggingface.co/stabilityai/sd-turbo) 112 | `img2imgSDTurbo` 113 | `controlnetSDTurbo` 114 | 115 | #### [Segmind-Vega](https://huggingface.co/segmind/Segmind-Vega) 116 | `controlnetSegmindVegaRT` 117 | `img2imgSegmindVegaRT` 118 | 119 | 120 | ### Setting environment variables 121 | 122 | 123 | * `--host`: Host address (default: 0.0.0.0) 124 | * `--port`: Port number (default: 7860) 125 | * `--reload`: Reload code on change 126 | * `--max-queue-size`: Maximum queue size (optional) 127 | * `--timeout`: Timeout period (optional) 128 | * `--safety-checker`: Enable Safety Checker (optional) 129 | * `--torch-compile`: Use Torch Compile 130 | * `--use-taesd` / `--no-taesd`: Use Tiny Autoencoder 131 | * `--pipeline`: Pipeline to use (default: "txt2img") 132 | * `--ssl-certfile`: SSL Certificate File (optional) 133 | * `--ssl-keyfile`: SSL Key File (optional) 134 | * `--debug`: Print Inference time 135 | * `--compel`: Compel option 136 | * `--sfast`: Enable Stable Fast 137 | * `--onediff`: Enable OneDiff 138 | 139 | If you run using `bash build-run.sh` you can set `PIPELINE` variables to choose the pipeline you want to run 140 | 141 | ```bash 142 | PIPELINE=txt2imgLoraSDXL bash build-run.sh 143 | ``` 144 | 145 | and setting environment variables 146 | 147 | ```bash 148 | TIMEOUT=120 SAFETY_CHECKER=True MAX_QUEUE_SIZE=4 python server/main.py --reload --pipeline txt2imgLoraSDXL 149 | ``` 150 | 151 | If you're running locally and want to test it on Mobile Safari, the webserver needs to be served over HTTPS, or follow this instruction on my [comment](https://github.com/radames/Real-Time-Latent-Consistency-Model/issues/17#issuecomment-1811957196) 152 | 153 | ```bash 154 | openssl req -newkey rsa:4096 -nodes -keyout key.pem -x509 -days 365 -out certificate.pem 155 | python server/main.py --reload --ssl-certfile=certificate.pem --ssl-keyfile=key.pem 156 | ``` 157 | 158 | ## Docker 159 | 160 | You need NVIDIA Container Toolkit for Docker, defaults to `controlnet`` 161 | 162 | ```bash 163 | docker build -t lcm-live . 164 | docker run -ti -p 7860:7860 --gpus all lcm-live 165 | ``` 166 | 167 | reuse models data from host to avoid downloading them again, you can change `~/.cache/huggingface` to any other directory, but if you use hugingface-cli locally, you can share the same cache 168 | 169 | ```bash 170 | docker run -ti -p 7860:7860 -e HF_HOME=/data -v ~/.cache/huggingface:/data --gpus all lcm-live 171 | ``` 172 | 173 | 174 | or with environment variables 175 | 176 | ```bash 177 | docker run -ti -e PIPELINE=txt2imgLoraSDXL -p 7860:7860 --gpus all lcm-live 178 | ``` 179 | 180 | 181 | # Demo on Hugging Face 182 | 183 | 184 | * [radames/Real-Time-Latent-Consistency-Model](https://huggingface.co/spaces/radames/Real-Time-Latent-Consistency-Model) 185 | * [radames/Real-Time-SD-Turbo](https://huggingface.co/spaces/radames/Real-Time-SD-Turbo) 186 | * [latent-consistency/Real-Time-LCM-ControlNet-Lora-SD1.5](https://huggingface.co/spaces/latent-consistency/Real-Time-LCM-ControlNet-Lora-SD1.5) 187 | * [latent-consistency/Real-Time-LCM-Text-to-Image-Lora-SD1.5](https://huggingface.co/spaces/latent-consistency/Real-Time-LCM-Text-to-Image-Lora-SD1.5) 188 | * [radames/Real-Time-Latent-Consistency-Model-Text-To-Image](https://huggingface.co/spaces/radames/Real-Time-Latent-Consistency-Model-Text-To-Image) 189 | 190 | 191 | 192 | 193 | https://github.com/radames/Real-Time-Latent-Consistency-Model/assets/102277/c4003ac5-e7ff-44c0-97d3-464bb659de70 194 | -------------------------------------------------------------------------------- /build-run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd frontend 3 | npm install 4 | npm run build 5 | if [ $? -eq 0 ]; then 6 | echo -e "\033[1;32m\nfrontend build success \033[0m" 7 | else 8 | echo -e "\033[1;31m\nfrontend build failed\n\033[0m" >&2 exit 1 9 | fi 10 | cd ../ 11 | #check if var PIPELINE is set otherwise get default 12 | if [ -z ${PIPELINE+x} ]; then 13 | PIPELINE="controlnet" 14 | fi 15 | if [ -z ${COMPILE+x} ]; then 16 | COMPILE="--sfast" 17 | fi 18 | echo -e "\033[1;32m\npipeline: $PIPELINE \033[0m" 19 | echo -e "\033[1;32m\ncompile: $COMPILE \033[0m" 20 | python3 ./server/main.py --port 7860 --host 0.0.0.0 --pipeline $PIPELINE $COMPILE -------------------------------------------------------------------------------- /frontend/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | 3 | # Output 4 | .output 5 | .vercel 6 | .netlify 7 | .wrangler 8 | /.svelte-kit 9 | /build 10 | 11 | # OS 12 | .DS_Store 13 | Thumbs.db 14 | 15 | # Env 16 | .env 17 | .env.* 18 | !.env.example 19 | !.env.test 20 | 21 | # Vite 22 | vite.config.js.timestamp-* 23 | vite.config.ts.timestamp-* 24 | public/ 25 | -------------------------------------------------------------------------------- /frontend/.npmrc: -------------------------------------------------------------------------------- 1 | engine-strict=true 2 | -------------------------------------------------------------------------------- /frontend/.nvmrc: -------------------------------------------------------------------------------- 1 | v20.14.0 2 | -------------------------------------------------------------------------------- /frontend/.prettierignore: -------------------------------------------------------------------------------- 1 | # Package Managers 2 | package-lock.json 3 | pnpm-lock.yaml 4 | yarn.lock 5 | bun.lock 6 | bun.lockb -------------------------------------------------------------------------------- /frontend/.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "plugins": ["prettier-plugin-svelte", "prettier-plugin-tailwindcss"], 3 | "overrides": [ 4 | { 5 | "files": "*.svelte", 6 | "options": { 7 | "parser": "svelte" 8 | } 9 | } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /frontend/README.md: -------------------------------------------------------------------------------- 1 | # sv 2 | 3 | Everything you need to build a Svelte project, powered by [`sv`](https://github.com/sveltejs/cli). 4 | 5 | ## Creating a project 6 | 7 | If you're seeing this, you've probably already done this step. Congrats! 8 | 9 | ```bash 10 | # create a new project in the current directory 11 | npx sv create 12 | 13 | # create a new project in my-app 14 | npx sv create my-app 15 | ``` 16 | 17 | ## Developing 18 | 19 | Once you've created a project and installed dependencies with `npm install` (or `pnpm install` or `yarn`), start a development server: 20 | 21 | ```bash 22 | npm run dev 23 | 24 | # or start the server and open the app in a new browser tab 25 | npm run dev -- --open 26 | ``` 27 | 28 | ## Building 29 | 30 | To create a production version of your app: 31 | 32 | ```bash 33 | npm run build 34 | ``` 35 | 36 | You can preview the production build with `npm run preview`. 37 | 38 | > To deploy your app, you may need to install an [adapter](https://svelte.dev/docs/kit/adapters) for your target environment. 39 | -------------------------------------------------------------------------------- /frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "frontend", 3 | "private": true, 4 | "version": "0.0.1", 5 | "type": "module", 6 | "scripts": { 7 | "dev": "vite dev", 8 | "build": "vite build", 9 | "preview": "vite preview", 10 | "prepare": "svelte-kit sync || echo ''", 11 | "check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json", 12 | "check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch", 13 | "lint": "eslint . && prettier --check .", 14 | "format": "prettier --write ." 15 | }, 16 | "devDependencies": { 17 | "@eslint/compat": "^1.2.5", 18 | "@eslint/js": "^9.26.0", 19 | "@sveltejs/adapter-static": "^3.0.8", 20 | "@sveltejs/kit": "^2.16.0", 21 | "@sveltejs/vite-plugin-svelte": "^5.0.0", 22 | "@tailwindcss/typography": "^0.5.15", 23 | "@tailwindcss/vite": "^4.1.5", 24 | "eslint": "^9.26.0", 25 | "eslint-config-prettier": "^10.0.1", 26 | "eslint-plugin-svelte": "^3.0.0", 27 | "globals": "^16.0.0", 28 | "prettier": "^3.4.2", 29 | "prettier-plugin-svelte": "^3.3.3", 30 | "prettier-plugin-tailwindcss": "^0.6.11", 31 | "svelte": "^5.0.0", 32 | "svelte-check": "^4.0.0", 33 | "tailwindcss": "^4.1.5", 34 | "typescript": "^5.0.0", 35 | "typescript-eslint": "^8.20.0", 36 | "vite": "^6.2.6" 37 | }, 38 | "dependencies": { 39 | "piexifjs": "^1.0.6", 40 | "rvfc-polyfill": "^1.0.7" 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /frontend/src/app.css: -------------------------------------------------------------------------------- 1 | @import "tailwindcss"; 2 | @plugin '@tailwindcss/typography'; 3 | -------------------------------------------------------------------------------- /frontend/src/app.d.ts: -------------------------------------------------------------------------------- 1 | // See https://svelte.dev/docs/kit/types#app.d.ts 2 | // for information about these interfaces 3 | declare global { 4 | namespace App { 5 | // interface Error {} 6 | // interface Locals {} 7 | // interface PageData {} 8 | // interface PageState {} 9 | // interface Platform {} 10 | } 11 | } 12 | 13 | export {}; 14 | -------------------------------------------------------------------------------- /frontend/src/app.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | %sveltekit.head% 8 | 9 | 10 |
%sveltekit.body%
11 | 12 | 13 | -------------------------------------------------------------------------------- /frontend/src/lib/components/AspectRatioSelect.svelte: -------------------------------------------------------------------------------- 1 | 17 | 18 |
19 | 28 |
29 | -------------------------------------------------------------------------------- /frontend/src/lib/components/Button.svelte: -------------------------------------------------------------------------------- 1 | 4 | 5 | 8 | 9 | 15 | -------------------------------------------------------------------------------- /frontend/src/lib/components/Checkbox.svelte: -------------------------------------------------------------------------------- 1 | 11 | 12 |
13 | 14 | 20 |
21 | -------------------------------------------------------------------------------- /frontend/src/lib/components/ImagePlayer.svelte: -------------------------------------------------------------------------------- 1 | 42 | 43 |
46 | {#if $lcmLiveStatus === LCMLiveStatus.CONNECTING} 47 | 48 |
49 |
52 |

Connecting...

53 |
54 | {:else if isLCMRunning} 55 | {#if !isExpanded} 56 | 57 | 58 | { 63 | console.error("Image stream error:", e); 64 | // If stream fails to load, set status to error 65 | if ($lcmLiveStatus !== LCMLiveStatus.ERROR) { 66 | lcmLiveStatus.set(LCMLiveStatus.ERROR); 67 | } 68 | }} 69 | /> 70 | {/if} 71 |
72 | 79 | 87 |
88 | {:else if $lcmLiveStatus === LCMLiveStatus.ERROR} 89 | 90 |
93 |

Connection error

94 |
95 | {:else} 96 | 97 | 101 | {/if} 102 |
103 | -------------------------------------------------------------------------------- /frontend/src/lib/components/InputRange.svelte: -------------------------------------------------------------------------------- 1 | 11 | 12 |
13 | 14 | 24 | 30 |
31 | 52 | -------------------------------------------------------------------------------- /frontend/src/lib/components/MediaListSwitcher.svelte: -------------------------------------------------------------------------------- 1 | 14 | 15 |
18 | mediaStreamActions.switchCamera(deviceId, value)} 21 | /> 22 | 31 | {#if $mediaDevices} 32 | 42 | {/if} 43 |
44 | -------------------------------------------------------------------------------- /frontend/src/lib/components/PipelineOptions.svelte: -------------------------------------------------------------------------------- 1 | 20 | 21 |
22 |
23 | {#if featuredOptions} 24 | {#each featuredOptions as params (params.id)} 25 | {#if params.field === FieldType.RANGE} 26 | 28 | {:else if params.field === FieldType.SEED} 29 | 31 | {:else if params.field === FieldType.TEXTAREA} 32 | 34 | {:else if params.field === FieldType.CHECKBOX} 35 | 37 | {:else if params.field === FieldType.SELECT} 38 | 40 | {/if} 41 | {/each} 42 | {/if} 43 |
44 | 45 |
46 | Advanced Options 47 |
53 | {#if advanceOptions} 54 | {#each advanceOptions as params (params.id)} 55 | {#if params.field === FieldType.RANGE} 56 | 60 | {:else if params.field === FieldType.SEED} 61 | 65 | {:else if params.field === FieldType.TEXTAREA} 66 | 68 | {:else if params.field === FieldType.CHECKBOX} 69 | 73 | {:else if params.field === FieldType.SELECT} 74 | 78 | {/if} 79 | {/each} 80 | {/if} 81 |
82 |
83 |
84 | -------------------------------------------------------------------------------- /frontend/src/lib/components/SeedInput.svelte: -------------------------------------------------------------------------------- 1 | 15 | 16 |
17 | 18 | 25 | 26 |
27 | -------------------------------------------------------------------------------- /frontend/src/lib/components/Selectlist.svelte: -------------------------------------------------------------------------------- 1 | 11 | 12 |
13 | 14 | {#if params?.values} 15 | 24 | {/if} 25 |
26 | -------------------------------------------------------------------------------- /frontend/src/lib/components/TextArea.svelte: -------------------------------------------------------------------------------- 1 | 11 | 12 |
13 | 16 |
17 | 23 |
24 |
25 | -------------------------------------------------------------------------------- /frontend/src/lib/components/VideoInput.svelte: -------------------------------------------------------------------------------- 1 | 108 | 109 |
112 |
115 | {#if $mediaDevices.length > 0} 116 |
117 | 118 | 125 |
126 | {/if} 127 | 138 | 142 |
143 |
146 | 151 | 155 | 156 |
157 |
158 | -------------------------------------------------------------------------------- /frontend/src/lib/components/Warning.svelte: -------------------------------------------------------------------------------- 1 | 15 | 16 | {#if message} 17 | 30 | {/if} 31 | 32 | 34 | -------------------------------------------------------------------------------- /frontend/src/lib/icons/aspect.svelte: -------------------------------------------------------------------------------- 1 | 4 | 5 | 11 | 15 | 16 | -------------------------------------------------------------------------------- /frontend/src/lib/icons/expand.svelte: -------------------------------------------------------------------------------- 1 | 4 | 5 | 11 | 15 | 16 | -------------------------------------------------------------------------------- /frontend/src/lib/icons/floppy.svelte: -------------------------------------------------------------------------------- 1 | 4 | 5 | 11 | 15 | 16 | -------------------------------------------------------------------------------- /frontend/src/lib/icons/screen.svelte: -------------------------------------------------------------------------------- 1 | 4 | 5 | 11 | 15 | 16 | -------------------------------------------------------------------------------- /frontend/src/lib/icons/spinner.svelte: -------------------------------------------------------------------------------- 1 | 4 | 5 | 11 | 15 | 16 | -------------------------------------------------------------------------------- /frontend/src/lib/index.ts: -------------------------------------------------------------------------------- 1 | // place files you want to import through the `$lib` alias in this folder. 2 | -------------------------------------------------------------------------------- /frontend/src/lib/lcmLive.ts: -------------------------------------------------------------------------------- 1 | import { get, writable } from "svelte/store"; 2 | 3 | export enum LCMLiveStatus { 4 | CONNECTED = "connected", 5 | DISCONNECTED = "disconnected", 6 | CONNECTING = "connecting", 7 | WAIT = "wait", 8 | SEND_FRAME = "send_frame", 9 | TIMEOUT = "timeout", 10 | ERROR = "error", 11 | } 12 | 13 | const initStatus: LCMLiveStatus = LCMLiveStatus.DISCONNECTED; 14 | 15 | export const lcmLiveStatus = writable(initStatus); 16 | export const streamId = writable(null); 17 | 18 | // WebSocket connection 19 | let websocket: WebSocket | null; 20 | 21 | // Register browser unload event listener to properly close WebSockets 22 | if (typeof window !== "undefined") { 23 | window.addEventListener("beforeunload", () => { 24 | // Close the WebSocket properly if it exists 25 | if (websocket && websocket.readyState === WebSocket.OPEN) { 26 | websocket.close(1000, "Page unload"); 27 | } 28 | }); 29 | } 30 | export const lcmLiveActions = { 31 | async start( 32 | getSreamdata: () => 33 | | [Record] 34 | | [Record, Blob], 35 | ) { 36 | return new Promise((resolve, reject) => { 37 | try { 38 | // Set connecting status immediately 39 | lcmLiveStatus.set(LCMLiveStatus.CONNECTING); 40 | 41 | const userId = crypto.randomUUID(); 42 | const websocketURL = `${ 43 | window.location.protocol === "https:" ? "wss" : "ws" 44 | }:${window.location.host}/api/ws/${userId}`; 45 | 46 | // Close any existing connection first 47 | if (websocket && websocket.readyState !== WebSocket.CLOSED) { 48 | websocket.close(); 49 | } 50 | 51 | websocket = new WebSocket(websocketURL); 52 | 53 | // Set a connection timeout 54 | const connectionTimeout = setTimeout(() => { 55 | if (websocket && websocket.readyState !== WebSocket.OPEN) { 56 | console.error("WebSocket connection timeout"); 57 | lcmLiveStatus.set(LCMLiveStatus.ERROR); 58 | streamId.set(null); 59 | reject(new Error("Connection timeout. Please try again.")); 60 | websocket.close(); 61 | } 62 | }, 10000); // 10 second timeout 63 | 64 | websocket.onopen = () => { 65 | clearTimeout(connectionTimeout); 66 | console.log("Connected to websocket"); 67 | }; 68 | 69 | websocket.onclose = (event) => { 70 | clearTimeout(connectionTimeout); 71 | console.log( 72 | `Disconnected from websocket: ${event.code} ${event.reason}`, 73 | ); 74 | 75 | // Only change status if we're not in ERROR state (which would mean we already handled the error) 76 | if (get(lcmLiveStatus) !== LCMLiveStatus.ERROR) { 77 | lcmLiveStatus.set(LCMLiveStatus.DISCONNECTED); 78 | } 79 | 80 | // If connection was never established (close without open) 81 | if (event.code === 1006 && get(streamId) === null) { 82 | reject( 83 | new Error("Cannot connect to server. Please try again later."), 84 | ); 85 | } 86 | }; 87 | 88 | websocket.onerror = (err) => { 89 | clearTimeout(connectionTimeout); 90 | console.error("WebSocket error:", err); 91 | lcmLiveStatus.set(LCMLiveStatus.ERROR); 92 | streamId.set(null); 93 | reject(new Error("Connection error. Please try again.")); 94 | }; 95 | 96 | websocket.onmessage = (event) => { 97 | try { 98 | const data = JSON.parse(event.data); 99 | switch (data.status) { 100 | case "connected": 101 | lcmLiveStatus.set(LCMLiveStatus.CONNECTED); 102 | streamId.set(userId); 103 | resolve({ status: "connected", userId }); 104 | break; 105 | case "send_frame": 106 | lcmLiveStatus.set(LCMLiveStatus.SEND_FRAME); 107 | try { 108 | const streamData = getSreamdata(); 109 | // Send as an object, not a string, to use the proper handling in the send method 110 | this.send({ status: "next_frame" }); 111 | for (const d of streamData) { 112 | this.send(d); 113 | } 114 | } catch (error) { 115 | console.error("Error sending frame data:", error); 116 | } 117 | break; 118 | case "wait": 119 | lcmLiveStatus.set(LCMLiveStatus.WAIT); 120 | break; 121 | case "timeout": 122 | console.log("Session timeout"); 123 | lcmLiveStatus.set(LCMLiveStatus.TIMEOUT); 124 | streamId.set(null); 125 | reject(new Error("Session timeout. Please restart.")); 126 | break; 127 | case "error": 128 | console.error("Server error:", data.message); 129 | lcmLiveStatus.set(LCMLiveStatus.ERROR); 130 | streamId.set(null); 131 | reject(new Error(data.message || "Server error occurred")); 132 | break; 133 | default: 134 | console.log("Unknown message status:", data.status); 135 | } 136 | } catch (error) { 137 | console.error("Error handling websocket message:", error); 138 | } 139 | }; 140 | } catch (err) { 141 | console.error("Error initializing websocket:", err); 142 | lcmLiveStatus.set(LCMLiveStatus.ERROR); 143 | streamId.set(null); 144 | reject(err); 145 | } 146 | }); 147 | }, 148 | send(data: Blob | Record) { 149 | try { 150 | if (websocket && websocket.readyState === WebSocket.OPEN) { 151 | if (data instanceof Blob) { 152 | websocket.send(data); 153 | } else { 154 | websocket.send(JSON.stringify(data)); 155 | } 156 | } else { 157 | const readyStateText = websocket 158 | ? ["CONNECTING", "OPEN", "CLOSING", "CLOSED"][websocket.readyState] 159 | : "null"; 160 | console.warn(`WebSocket not ready for sending: ${readyStateText}`); 161 | 162 | // If WebSocket is closed unexpectedly, set status to disconnected 163 | if (!websocket || websocket.readyState === WebSocket.CLOSED) { 164 | lcmLiveStatus.set(LCMLiveStatus.DISCONNECTED); 165 | streamId.set(null); 166 | } 167 | } 168 | } catch (error) { 169 | console.error("Error sending data through WebSocket:", error); 170 | // Handle WebSocket error by forcing disconnection 171 | this.stop(); 172 | } 173 | }, 174 | 175 | async reconnect( 176 | getSreamdata: () => 177 | | [Record] 178 | | [Record, Blob], 179 | ) { 180 | try { 181 | await this.stop(); 182 | // Small delay to ensure clean disconnection before reconnecting 183 | await new Promise((resolve) => setTimeout(resolve, 500)); 184 | return await this.start(getSreamdata); 185 | } catch (error) { 186 | console.error("Reconnection failed:", error); 187 | throw error; 188 | } 189 | }, 190 | 191 | async stop() { 192 | lcmLiveStatus.set(LCMLiveStatus.DISCONNECTED); 193 | try { 194 | if (websocket) { 195 | // Only attempt to close if not already closed 196 | if (websocket.readyState !== WebSocket.CLOSED) { 197 | // Set up onclose handler to clean up only 198 | websocket.onclose = () => { 199 | console.log("WebSocket closed cleanly during stop()"); 200 | }; 201 | 202 | // Set up onerror to be silent during intentional closure 203 | websocket.onerror = () => {}; 204 | 205 | websocket.close(1000, "Client initiated disconnect"); 206 | } 207 | } 208 | } catch (error) { 209 | console.error("Error during WebSocket closure:", error); 210 | } finally { 211 | // Always clean up references 212 | websocket = null; 213 | streamId.set(null); 214 | } 215 | }, 216 | }; 217 | -------------------------------------------------------------------------------- /frontend/src/lib/mediaStream.ts: -------------------------------------------------------------------------------- 1 | import { get, writable, type Writable } from "svelte/store"; 2 | 3 | const BASE_HEIGHT = 720; 4 | export enum MediaStreamStatusEnum { 5 | INIT = "init", 6 | CONNECTED = "connected", 7 | DISCONNECTED = "disconnected", 8 | } 9 | export const onFrameChangeStore: Writable<{ blob: Blob }> = writable({ 10 | blob: new Blob(), 11 | }); 12 | 13 | export const mediaDevices = writable([]); 14 | export const mediaStreamStatus = writable(MediaStreamStatusEnum.INIT); 15 | export const mediaStream = writable(null); 16 | 17 | export const mediaStreamActions = { 18 | async enumerateDevices() { 19 | // console.log("Enumerating devices"); 20 | await navigator.mediaDevices 21 | .enumerateDevices() 22 | .then((devices) => { 23 | const cameras = devices.filter( 24 | (device) => device.kind === "videoinput", 25 | ); 26 | mediaDevices.set(cameras); 27 | }) 28 | .catch((err) => { 29 | console.error(err); 30 | }); 31 | }, 32 | async start(mediaDevicedID?: string, aspectRatio: number = 1) { 33 | const constraints = { 34 | audio: false, 35 | video: { 36 | width: { 37 | ideal: BASE_HEIGHT * aspectRatio, 38 | }, 39 | height: { 40 | ideal: BASE_HEIGHT, 41 | }, 42 | deviceId: mediaDevicedID, 43 | }, 44 | }; 45 | 46 | await navigator.mediaDevices 47 | .getUserMedia(constraints) 48 | .then((stream) => { 49 | mediaStreamStatus.set(MediaStreamStatusEnum.CONNECTED); 50 | mediaStream.set(stream); 51 | }) 52 | .catch((err) => { 53 | console.error(`${err.name}: ${err.message}`); 54 | mediaStreamStatus.set(MediaStreamStatusEnum.DISCONNECTED); 55 | mediaStream.set(null); 56 | }); 57 | }, 58 | async startScreenCapture() { 59 | const displayMediaOptions = { 60 | video: { 61 | displaySurface: "window", 62 | }, 63 | audio: false, 64 | surfaceSwitching: "include", 65 | }; 66 | 67 | let captureStream = null; 68 | 69 | try { 70 | captureStream = 71 | await navigator.mediaDevices.getDisplayMedia(displayMediaOptions); 72 | const videoTrack = captureStream.getVideoTracks()[0]; 73 | 74 | console.log("Track settings:"); 75 | console.log(JSON.stringify(videoTrack.getSettings(), null, 2)); 76 | console.log("Track constraints:"); 77 | console.log(JSON.stringify(videoTrack.getConstraints(), null, 2)); 78 | mediaStreamStatus.set(MediaStreamStatusEnum.CONNECTED); 79 | mediaStream.set(captureStream); 80 | 81 | const capabilities = videoTrack.getCapabilities(); 82 | const aspectRatio = capabilities.aspectRatio; 83 | console.log("Aspect Ratio Constraints:", aspectRatio); 84 | } catch (err) { 85 | console.error(err); 86 | } 87 | }, 88 | async switchCamera(mediaDevicedID: string, aspectRatio: number) { 89 | console.log("Switching camera"); 90 | if (get(mediaStreamStatus) !== MediaStreamStatusEnum.CONNECTED) { 91 | return; 92 | } 93 | const constraints = { 94 | audio: false, 95 | video: { 96 | width: { 97 | ideal: BASE_HEIGHT * aspectRatio, 98 | }, 99 | height: { 100 | ideal: BASE_HEIGHT, 101 | }, 102 | deviceId: mediaDevicedID, 103 | }, 104 | }; 105 | console.log("Switching camera", constraints); 106 | await navigator.mediaDevices 107 | .getUserMedia(constraints) 108 | .then((stream) => { 109 | mediaStreamStatus.set(MediaStreamStatusEnum.CONNECTED); 110 | mediaStream.set(stream); 111 | }) 112 | .catch((err) => { 113 | console.error(`${err.name}: ${err.message}`); 114 | }); 115 | }, 116 | async stop() { 117 | navigator.mediaDevices.getUserMedia({ video: true }).then((stream) => { 118 | stream.getTracks().forEach((track) => track.stop()); 119 | }); 120 | mediaStreamStatus.set(MediaStreamStatusEnum.DISCONNECTED); 121 | mediaStream.set(null); 122 | }, 123 | }; 124 | -------------------------------------------------------------------------------- /frontend/src/lib/store.ts: -------------------------------------------------------------------------------- 1 | import { 2 | derived, 3 | get, 4 | writable, 5 | type Readable, 6 | type Writable, 7 | } from "svelte/store"; 8 | 9 | export type PipelineValues = Record; 10 | 11 | export const pipelineValues: Writable = writable({}); 12 | export const deboucedPipelineValues: Readable = derived( 13 | pipelineValues, 14 | ($pipelineValues, set) => { 15 | const debounced = setTimeout(() => { 16 | set($pipelineValues); 17 | }, 100); 18 | return () => clearTimeout(debounced); 19 | }, 20 | ); 21 | 22 | export const getPipelineValues = () => get(pipelineValues); 23 | -------------------------------------------------------------------------------- /frontend/src/lib/types.ts: -------------------------------------------------------------------------------- 1 | export const enum FieldType { 2 | RANGE = "range", 3 | SEED = "seed", 4 | TEXTAREA = "textarea", 5 | CHECKBOX = "checkbox", 6 | SELECT = "select", 7 | } 8 | export const enum PipelineMode { 9 | IMAGE = "image", 10 | VIDEO = "video", 11 | TEXT = "text", 12 | } 13 | 14 | export interface Fields { 15 | [key: string]: FieldProps; 16 | } 17 | 18 | export interface FieldProps { 19 | default: number | string; 20 | max?: number; 21 | min?: number; 22 | title: string; 23 | field: FieldType; 24 | step?: number; 25 | disabled?: boolean; 26 | hide?: boolean; 27 | id: string; 28 | values?: string[]; 29 | } 30 | export interface PipelineInfo { 31 | title: { 32 | default: string; 33 | }; 34 | name: string; 35 | description: string; 36 | input_mode: { 37 | default: PipelineMode; 38 | }; 39 | } 40 | -------------------------------------------------------------------------------- /frontend/src/lib/utils.ts: -------------------------------------------------------------------------------- 1 | import * as piexif from "piexifjs"; 2 | 3 | export interface IImageInfo { 4 | prompt?: string; 5 | negative_prompt?: string; 6 | seed?: number; 7 | guidance_scale?: number; 8 | } 9 | 10 | export enum windowType { 11 | image = "image", 12 | } 13 | 14 | export function snapImage(imageEl: HTMLImageElement, info: IImageInfo) { 15 | try { 16 | const zeroth: { [key: string]: string | number } = {}; 17 | const exif: { [key: string]: string | number } = {}; 18 | const gps: { [key: string]: string | number } = {}; 19 | zeroth[piexif.ImageIFD.Make] = "LCM Image-to-Image ControNet"; 20 | zeroth[piexif.ImageIFD.ImageDescription] = 21 | `prompt: ${info?.prompt} | negative_prompt: ${info?.negative_prompt} | seed: ${info?.seed} | guidance_scale: ${info?.guidance_scale}`; 22 | zeroth[piexif.ImageIFD.Software] = 23 | "https://github.com/radames/Real-Time-Latent-Consistency-Model"; 24 | exif[piexif.ExifIFD.DateTimeOriginal] = new Date().toISOString(); 25 | 26 | const exifObj = { "0th": zeroth, Exif: exif, GPS: gps }; 27 | const exifBytes = piexif.dump(exifObj); 28 | 29 | const canvas = document.createElement("canvas"); 30 | canvas.width = imageEl.naturalWidth; 31 | canvas.height = imageEl.naturalHeight; 32 | const ctx = canvas.getContext("2d") as CanvasRenderingContext2D; 33 | ctx.drawImage(imageEl, 0, 0); 34 | const dataURL = canvas.toDataURL("image/jpeg"); 35 | const withExif = piexif.insert(exifBytes, dataURL); 36 | 37 | const a = document.createElement("a"); 38 | a.href = withExif; 39 | a.download = `lcm_txt_2_img${Date.now()}.png`; 40 | a.click(); 41 | } catch (err) { 42 | console.log(err); 43 | } 44 | } 45 | 46 | export function expandWindow(streamURL: string) { 47 | const newWindow = window.open( 48 | "", 49 | "_blank", 50 | "width=1024,height=1024,scrollbars=0,resizable=1,toolbar=0,menubar=0,location=0,directories=0,status=0", 51 | ) as Window; 52 | 53 | const html = ` 54 | 55 | 56 | Real-Time Latent Consistency Model 57 | 64 | 65 | 66 | 85 | 86 | 87 | `; 88 | newWindow.document.write(html); 89 | 90 | const img = newWindow.document.createElement("img"); 91 | img.src = streamURL; 92 | img.style.width = "100%"; 93 | img.style.height = "100%"; 94 | img.style.objectFit = "contain"; 95 | newWindow.document.body.appendChild(img); 96 | 97 | return newWindow; 98 | } 99 | -------------------------------------------------------------------------------- /frontend/src/piexifjs.d.ts: -------------------------------------------------------------------------------- 1 | declare module "piexifjs" { 2 | export const ImageIFD: { 3 | Make: number; 4 | ImageDescription: number; 5 | Software: number; 6 | }; 7 | export const ExifIFD: { 8 | DateTimeOriginal: number; 9 | }; 10 | export function dump(exifObj: Record): string; 11 | export function insert(exifBytes: string, dataURL: string): string; 12 | } 13 | -------------------------------------------------------------------------------- /frontend/src/routes/+layout.svelte: -------------------------------------------------------------------------------- 1 | 6 | 7 | {@render children()} 8 | -------------------------------------------------------------------------------- /frontend/src/routes/+page.svelte: -------------------------------------------------------------------------------- 1 | 146 | 147 | 148 | 151 | 152 | 153 |
154 | 155 |
156 | {#if pageContent} 157 | 158 | {@html pageContent} 159 | {/if} 160 | {#if maxQueueSize > 0} 161 |

162 | There are {currentQueueSize} 165 | user(s) sharing the same GPU, affecting real-time performance. Maximum queue 166 | size is {maxQueueSize}. 167 | Duplicate and run it on your own GPU. 172 |

173 | {/if} 174 | 175 | {#if $lcmLiveStatus === LCMLiveStatus.ERROR} 176 |

177 | 184 |

185 | {/if} 186 |
187 | {#if pipelineParams} 188 |
189 | {#if isImageMode} 190 |
191 | 195 |
196 | {/if} 197 |
198 | 199 |
200 |
201 | 210 | 211 |
212 |
213 | {:else} 214 | 215 |
216 | 217 |

Loading...

218 |
219 | {/if} 220 |
221 | 222 | 228 | -------------------------------------------------------------------------------- /frontend/src/routes/+page.ts: -------------------------------------------------------------------------------- 1 | export const prerender = true; 2 | -------------------------------------------------------------------------------- /frontend/static/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radames/Real-Time-Latent-Consistency-Model/269f4347d93eb8e366e12b0f8f216c8b11262e76/frontend/static/favicon.png -------------------------------------------------------------------------------- /frontend/svelte.config.js: -------------------------------------------------------------------------------- 1 | import adapter from "@sveltejs/adapter-static"; 2 | import { vitePreprocess } from "@sveltejs/vite-plugin-svelte"; 3 | 4 | const config = { 5 | preprocess: vitePreprocess(), 6 | kit: { 7 | adapter: adapter({ 8 | pages: "public", 9 | assets: "public", 10 | fallback: undefined, 11 | precompress: false, 12 | strict: true, 13 | }), 14 | }, 15 | }; 16 | 17 | export default config; 18 | -------------------------------------------------------------------------------- /frontend/tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | export default { 3 | content: ["./src/**/*.{html,js,svelte,ts}"], 4 | theme: { 5 | extend: {}, 6 | }, 7 | plugins: [import("@tailwindcss/typography")], 8 | }; 9 | -------------------------------------------------------------------------------- /frontend/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./.svelte-kit/tsconfig.json", 3 | "compilerOptions": { 4 | "allowJs": true, 5 | "checkJs": true, 6 | "esModuleInterop": true, 7 | "forceConsistentCasingInFileNames": true, 8 | "resolveJsonModule": true, 9 | "skipLibCheck": true, 10 | "sourceMap": true, 11 | "strict": true, 12 | "moduleResolution": "bundler" 13 | } 14 | // Path aliases are handled by https://svelte.dev/docs/kit/configuration#alias 15 | // except $lib which is handled by https://svelte.dev/docs/kit/configuration#files 16 | // 17 | // If you want to overwrite includes/excludes, make sure to copy over the relevant includes/excludes 18 | // from the referenced tsconfig.json - TypeScript does not merge them in 19 | } 20 | -------------------------------------------------------------------------------- /frontend/vite.config.ts: -------------------------------------------------------------------------------- 1 | import { sveltekit } from "@sveltejs/kit/vite"; 2 | import tailwindcss from "@tailwindcss/vite"; 3 | import { defineConfig } from "vite"; 4 | 5 | export default defineConfig({ 6 | plugins: [tailwindcss(), sveltekit()], 7 | server: { 8 | proxy: { 9 | "/api": "http://localhost:7860", 10 | "/api/ws": { 11 | target: "ws://localhost:7860", 12 | ws: true, 13 | }, 14 | }, 15 | }, 16 | }); 17 | -------------------------------------------------------------------------------- /qr-code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radames/Real-Time-Latent-Consistency-Model/269f4347d93eb8e366e12b0f8f216c8b11262e76/qr-code.png -------------------------------------------------------------------------------- /server/config.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, field_validator 2 | import argparse 3 | import os 4 | from typing import Annotated 5 | 6 | 7 | class Args(BaseModel): 8 | host: str 9 | port: int 10 | reload: bool 11 | max_queue_size: int 12 | timeout: float 13 | safety_checker: bool 14 | torch_compile: bool 15 | taesd: bool 16 | pipeline: str 17 | ssl_certfile: str | None 18 | ssl_keyfile: str | None 19 | sfast: bool 20 | onediff: bool = False 21 | compel: bool = False 22 | debug: bool = False 23 | pruna: bool = False 24 | 25 | def pretty_print(self) -> None: 26 | print("\n") 27 | for field, value in self.model_dump().items(): 28 | print(f"{field}: {value}") 29 | print("\n") 30 | 31 | @field_validator("ssl_keyfile") 32 | @classmethod 33 | def validate_ssl_keyfile(cls, v: str | None, info) -> str | None: 34 | """Validate that if ssl_certfile is provided, ssl_keyfile is also provided.""" 35 | ssl_certfile = info.data.get("ssl_certfile") 36 | if ssl_certfile and not v: 37 | raise ValueError( 38 | "If ssl_certfile is provided, ssl_keyfile must also be provided" 39 | ) 40 | return v 41 | 42 | 43 | MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0)) 44 | TIMEOUT = float(os.environ.get("TIMEOUT", 0)) 45 | SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None) == "True" 46 | TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None) == "True" 47 | USE_TAESD = os.environ.get("USE_TAESD", "False") == "True" 48 | default_host = os.getenv("HOST", "0.0.0.0") 49 | default_port = int(os.getenv("PORT", "7860")) 50 | 51 | parser = argparse.ArgumentParser(description="Run the app") 52 | parser.add_argument("--host", type=str, default=default_host, help="Host address") 53 | parser.add_argument("--port", type=int, default=default_port, help="Port number") 54 | parser.add_argument("--reload", action="store_true", help="Reload code on change") 55 | parser.add_argument( 56 | "--max-queue-size", 57 | dest="max_queue_size", 58 | type=int, 59 | default=MAX_QUEUE_SIZE, 60 | help="Max Queue Size", 61 | ) 62 | parser.add_argument("--timeout", type=float, default=TIMEOUT, help="Timeout") 63 | parser.add_argument( 64 | "--safety-checker", 65 | dest="safety_checker", 66 | action="store_true", 67 | default=SAFETY_CHECKER, 68 | help="Safety Checker", 69 | ) 70 | parser.add_argument( 71 | "--torch-compile", 72 | dest="torch_compile", 73 | action="store_true", 74 | default=TORCH_COMPILE, 75 | help="Torch Compile", 76 | ) 77 | parser.add_argument( 78 | "--taesd", 79 | dest="taesd", 80 | action="store_true", 81 | help="Use Tiny Autoencoder", 82 | ) 83 | parser.add_argument( 84 | "--pipeline", 85 | type=str, 86 | default="txt2img", 87 | help="Pipeline to use", 88 | ) 89 | parser.add_argument( 90 | "--ssl-certfile", 91 | dest="ssl_certfile", 92 | type=str, 93 | default=None, 94 | help="SSL certfile", 95 | ) 96 | parser.add_argument( 97 | "--ssl-keyfile", 98 | dest="ssl_keyfile", 99 | type=str, 100 | default=None, 101 | help="SSL keyfile", 102 | ) 103 | parser.add_argument( 104 | "--debug", 105 | action="store_true", 106 | default=False, 107 | help="Debug", 108 | ) 109 | parser.add_argument( 110 | "--compel", 111 | action="store_true", 112 | default=False, 113 | help="Compel", 114 | ) 115 | parser.add_argument( 116 | "--sfast", 117 | action="store_true", 118 | default=False, 119 | help="Enable Stable Fast", 120 | ) 121 | parser.add_argument( 122 | "--onediff", 123 | action="store_true", 124 | default=False, 125 | help="Enable OneDiff", 126 | ) 127 | parser.add_argument( 128 | "--pruna", 129 | action="store_true", 130 | default=False, 131 | help="Enable Pruna", 132 | ) 133 | parser.set_defaults(taesd=USE_TAESD) 134 | 135 | config = Args.model_validate(vars(parser.parse_args())) 136 | config.pretty_print() 137 | -------------------------------------------------------------------------------- /server/device.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | # check if MPS is available OSX only M1/M2/M3 chips 4 | mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available() 5 | xpu_available = hasattr(torch, "xpu") and torch.xpu.is_available() 6 | device = torch.device( 7 | "cuda" if torch.cuda.is_available() else "xpu" if xpu_available else "cpu" 8 | ) 9 | torch_dtype = torch.float16 10 | if mps_available: 11 | device = torch.device("mps") 12 | torch_dtype = torch.float32 13 | -------------------------------------------------------------------------------- /server/pipelines/IPcompositionHyperSD15.py: -------------------------------------------------------------------------------- 1 | from diffusers import ( 2 | DiffusionPipeline, 3 | TCDScheduler, 4 | ) 5 | from compel import Compel 6 | import torch 7 | from transformers import CLIPVisionModelWithProjection 8 | from huggingface_hub import hf_hub_download 9 | 10 | try: 11 | import intel_extension_for_pytorch as ipex # type: ignore 12 | except: 13 | pass 14 | 15 | from config import Args 16 | from pydantic import BaseModel, Field 17 | from util import ParamsModel 18 | from PIL import Image 19 | 20 | model_id = "runwayml/stable-diffusion-v1-5" 21 | ip_adapter_model = "ostris/ip-composition-adapter" 22 | file_name = "ip_plus_composition_sd15.safetensors" 23 | 24 | default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece" 25 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated" 26 | page_content = """ 27 |

Hyper-SD Unified + IP Adpater Composition

28 |

Image-to-Image ControlNet

29 | 30 | """ 31 | 32 | 33 | class Pipeline: 34 | class Info(BaseModel): 35 | name: str = "controlnet+SDXL+Turbo" 36 | title: str = "SDXL Turbo + Controlnet" 37 | description: str = "Generates an image from a text prompt" 38 | input_mode: str = "image" 39 | page_content: str = page_content 40 | 41 | class InputParams(ParamsModel): 42 | prompt: str = Field( 43 | default_prompt, 44 | title="Prompt", 45 | field="textarea", 46 | id="prompt", 47 | ) 48 | negative_prompt: str = Field( 49 | default_negative_prompt, 50 | title="Negative Prompt", 51 | field="textarea", 52 | id="negative_prompt", 53 | hide=True, 54 | ) 55 | seed: int = Field( 56 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed" 57 | ) 58 | steps: int = Field( 59 | 2, min=1, max=15, title="Steps", field="range", hide=True, id="steps" 60 | ) 61 | width: int = Field( 62 | 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width" 63 | ) 64 | height: int = Field( 65 | 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height" 66 | ) 67 | guidance_scale: float = Field( 68 | 0.0, 69 | min=0, 70 | max=10, 71 | step=0.001, 72 | title="Guidance Scale", 73 | field="range", 74 | hide=True, 75 | id="guidance_scale", 76 | ) 77 | ip_adapter_scale: float = Field( 78 | 0.8, 79 | min=0.0, 80 | max=1.0, 81 | step=0.001, 82 | title="IP Adapter Scale", 83 | field="range", 84 | hide=True, 85 | id="ip_adapter_scale", 86 | ) 87 | eta: float = Field( 88 | 1.0, 89 | min=0, 90 | max=1.0, 91 | step=0.001, 92 | title="Eta", 93 | field="range", 94 | hide=True, 95 | id="eta", 96 | ) 97 | 98 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype): 99 | image_encoder = CLIPVisionModelWithProjection.from_pretrained( 100 | "h94/IP-Adapter", 101 | subfolder="models/image_encoder", 102 | torch_dtype=torch.float16, 103 | ).to(device) 104 | 105 | self.pipe = DiffusionPipeline.from_pretrained( 106 | model_id, 107 | safety_checker=None, 108 | torch_dtype=torch_dtype, 109 | image_encoder=image_encoder, 110 | variant="fp16", 111 | ) 112 | 113 | self.pipe.load_ip_adapter( 114 | ip_adapter_model, 115 | subfolder="", 116 | weight_name=[file_name], 117 | image_encoder_folder=None, 118 | ) 119 | 120 | self.pipe.load_lora_weights( 121 | hf_hub_download("ByteDance/Hyper-SD", "Hyper-SD15-1step-lora.safetensors") 122 | ) 123 | self.pipe.fuse_lora() 124 | 125 | self.pipe.scheduler = TCDScheduler.from_config(self.pipe.scheduler.config) 126 | self.pipe.set_ip_adapter_scale([0.8]) 127 | 128 | # if args.compile: 129 | # pipe.unet = oneflow_compile(pipe.unet, options=compile_options) 130 | # pipe.vae.decoder = oneflow_compile(pipe.vae.decoder, options=compile_options) 131 | 132 | if args.sfast: 133 | from sfast.compilers.stable_diffusion_pipeline_compiler import ( 134 | compile, 135 | CompilationConfig, 136 | ) 137 | 138 | config = CompilationConfig.Default() 139 | # config.enable_xformers = True 140 | config.enable_triton = True 141 | config.enable_cuda_graph = True 142 | # cofig. 143 | self.pipe = compile(self.pipe, config=config) 144 | 145 | self.pipe.set_progress_bar_config(disable=True) 146 | self.pipe.to(device=device) 147 | if device.type != "mps": 148 | self.pipe.unet.to(memory_format=torch.channels_last) 149 | 150 | if args.compel: 151 | self.compel_proc = Compel( 152 | tokenizer=self.pipe.tokenizer, 153 | text_encoder=self.pipe.text_encoder, 154 | truncate_long_prompts=False, 155 | ) 156 | 157 | if args.torch_compile: 158 | self.pipe.unet = torch.compile( 159 | self.pipe.unet, mode="reduce-overhead", fullgraph=True 160 | ) 161 | self.pipe.vae = torch.compile( 162 | self.pipe.vae, mode="reduce-overhead", fullgraph=True 163 | ) 164 | self.pipe( 165 | prompt="warmup", 166 | image=[Image.new("RGB", (768, 768))], 167 | ) 168 | 169 | def predict(self, params: "Pipeline.InputParams") -> Image.Image: 170 | generator = torch.manual_seed(params.seed) 171 | self.pipe.set_ip_adapter_scale([params.ip_adapter_scale]) 172 | 173 | prompt_embeds = None 174 | prompt = params.prompt 175 | if hasattr(self, "compel_proc"): 176 | prompt_embeds = self.compel_proc(prompt) 177 | prompt = None 178 | 179 | steps = params.steps 180 | 181 | results = self.pipe( 182 | prompt=prompt, 183 | prompt_embeds=prompt_embeds, 184 | generator=generator, 185 | num_inference_steps=steps, 186 | guidance_scale=params.guidance_scale, 187 | width=params.width, 188 | eta=params.eta, 189 | height=params.height, 190 | ip_adapter_image=[params.image], 191 | output_type="pil", 192 | ) 193 | 194 | return results.images[0] 195 | -------------------------------------------------------------------------------- /server/pipelines/IPcompositionHyperSDXL.py: -------------------------------------------------------------------------------- 1 | from diffusers import ( 2 | StableDiffusionXLPipeline, 3 | AutoencoderKL, 4 | TCDScheduler, 5 | ) 6 | from compel import Compel, ReturnedEmbeddingsType 7 | import torch 8 | from transformers import CLIPVisionModelWithProjection 9 | from huggingface_hub import hf_hub_download 10 | 11 | try: 12 | import intel_extension_for_pytorch as ipex # type: ignore 13 | except: 14 | pass 15 | 16 | from config import Args 17 | from pydantic import BaseModel, Field 18 | from util import ParamsModel 19 | from PIL import Image 20 | 21 | model_id = "stabilityai/stable-diffusion-xl-base-1.0" 22 | taesd_model = "madebyollin/taesdxl" 23 | ip_adapter_model = "ostris/ip-composition-adapter" 24 | file_name = "ip_plus_composition_sdxl.safetensors" 25 | 26 | default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece" 27 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated" 28 | page_content = """ 29 |

Hyper-SDXL Unified + IP Adpater Composition

30 |

Image-to-Image ControlNet

31 | 32 | """ 33 | 34 | 35 | class Pipeline: 36 | class Info(BaseModel): 37 | name: str = "controlnet+SDXL+Turbo" 38 | title: str = "SDXL Turbo + Controlnet" 39 | description: str = "Generates an image from a text prompt" 40 | input_mode: str = "image" 41 | page_content: str = page_content 42 | 43 | class InputParams(ParamsModel): 44 | prompt: str = Field( 45 | default_prompt, 46 | title="Prompt", 47 | field="textarea", 48 | id="prompt", 49 | ) 50 | negative_prompt: str = Field( 51 | default_negative_prompt, 52 | title="Negative Prompt", 53 | field="textarea", 54 | id="negative_prompt", 55 | hide=True, 56 | ) 57 | seed: int = Field( 58 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed" 59 | ) 60 | steps: int = Field( 61 | 2, min=1, max=15, title="Steps", field="range", hide=True, id="steps" 62 | ) 63 | width: int = Field( 64 | 1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width" 65 | ) 66 | height: int = Field( 67 | 1024, min=2, max=15, title="Height", disabled=True, hide=True, id="height" 68 | ) 69 | guidance_scale: float = Field( 70 | 0.0, 71 | min=0, 72 | max=10, 73 | step=0.001, 74 | title="Guidance Scale", 75 | field="range", 76 | hide=True, 77 | id="guidance_scale", 78 | ) 79 | ip_adapter_scale: float = Field( 80 | 0.8, 81 | min=0.0, 82 | max=1.0, 83 | step=0.001, 84 | title="IP Adapter Scale", 85 | field="range", 86 | hide=True, 87 | id="ip_adapter_scale", 88 | ) 89 | eta: float = Field( 90 | 1.0, 91 | min=0, 92 | max=1.0, 93 | step=0.001, 94 | title="Eta", 95 | field="range", 96 | hide=True, 97 | id="eta", 98 | ) 99 | 100 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype): 101 | vae = AutoencoderKL.from_pretrained( 102 | "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype 103 | ) 104 | image_encoder = CLIPVisionModelWithProjection.from_pretrained( 105 | "h94/IP-Adapter", 106 | subfolder="models/image_encoder", 107 | torch_dtype=torch.float16, 108 | ).to(device) 109 | 110 | self.pipe = StableDiffusionXLPipeline.from_pretrained( 111 | model_id, 112 | safety_checker=None, 113 | torch_dtype=torch_dtype, 114 | vae=vae, 115 | image_encoder=image_encoder, 116 | variant="fp16", 117 | ) 118 | self.pipe.load_ip_adapter( 119 | ip_adapter_model, 120 | subfolder="", 121 | weight_name=[file_name], 122 | image_encoder_folder=None, 123 | ) 124 | 125 | self.pipe.load_lora_weights( 126 | hf_hub_download("ByteDance/Hyper-SD", "Hyper-SDXL-1step-lora.safetensors") 127 | ) 128 | self.pipe.fuse_lora() 129 | 130 | self.pipe.scheduler = TCDScheduler.from_config(self.pipe.scheduler.config) 131 | self.pipe.set_ip_adapter_scale([0.8]) 132 | 133 | if args.sfast: 134 | from sfast.compilers.stable_diffusion_pipeline_compiler import ( 135 | compile, 136 | CompilationConfig, 137 | ) 138 | 139 | config = CompilationConfig.Default() 140 | # config.enable_xformers = True 141 | config.enable_triton = True 142 | config.enable_cuda_graph = True 143 | self.pipe = compile(self.pipe, config=config) 144 | 145 | self.pipe.set_progress_bar_config(disable=True) 146 | self.pipe.to(device=device) 147 | if device.type != "mps": 148 | self.pipe.unet.to(memory_format=torch.channels_last) 149 | 150 | if args.compel: 151 | self.pipe.compel_proc = Compel( 152 | tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2], 153 | text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2], 154 | returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, 155 | requires_pooled=[False, True], 156 | ) 157 | 158 | if args.torch_compile: 159 | self.pipe.unet = torch.compile( 160 | self.pipe.unet, mode="reduce-overhead", fullgraph=True 161 | ) 162 | self.pipe.vae = torch.compile( 163 | self.pipe.vae, mode="reduce-overhead", fullgraph=True 164 | ) 165 | self.pipe( 166 | prompt="warmup", 167 | image=[Image.new("RGB", (768, 768))], 168 | ) 169 | 170 | def predict(self, params: "Pipeline.InputParams") -> Image.Image: 171 | generator = torch.manual_seed(params.seed) 172 | self.pipe.set_ip_adapter_scale([params.ip_adapter_scale]) 173 | 174 | prompt = params.prompt 175 | negative_prompt = params.negative_prompt 176 | prompt_embeds = None 177 | pooled_prompt_embeds = None 178 | negative_prompt_embeds = None 179 | negative_pooled_prompt_embeds = None 180 | if hasattr(self.pipe, "compel_proc"): 181 | _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc( 182 | [params.prompt, params.negative_prompt] 183 | ) 184 | prompt = None 185 | negative_prompt = None 186 | prompt_embeds = _prompt_embeds[0:1] 187 | pooled_prompt_embeds = pooled_prompt_embeds[0:1] 188 | negative_prompt_embeds = _prompt_embeds[1:2] 189 | negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2] 190 | 191 | steps = params.steps 192 | 193 | results = self.pipe( 194 | prompt=prompt, 195 | negative_prompt=negative_prompt, 196 | prompt_embeds=prompt_embeds, 197 | pooled_prompt_embeds=pooled_prompt_embeds, 198 | negative_prompt_embeds=negative_prompt_embeds, 199 | negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, 200 | generator=generator, 201 | num_inference_steps=steps, 202 | guidance_scale=params.guidance_scale, 203 | width=params.width, 204 | eta=params.eta, 205 | height=params.height, 206 | ip_adapter_image=[params.image], 207 | output_type="pil", 208 | ) 209 | return results.images[0] 210 | -------------------------------------------------------------------------------- /server/pipelines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radames/Real-Time-Latent-Consistency-Model/269f4347d93eb8e366e12b0f8f216c8b11262e76/server/pipelines/__init__.py -------------------------------------------------------------------------------- /server/pipelines/controlnetFlashSD.py: -------------------------------------------------------------------------------- 1 | from diffusers import ( 2 | StableDiffusionControlNetImg2ImgPipeline, 3 | ControlNetModel, 4 | AutoencoderTiny, 5 | LCMScheduler, 6 | ) 7 | from compel import Compel, ReturnedEmbeddingsType 8 | import torch 9 | from pipelines.utils.canny_gpu import SobelOperator 10 | 11 | try: 12 | import intel_extension_for_pytorch as ipex # type: ignore 13 | except: 14 | pass 15 | 16 | from config import Args 17 | from pydantic import BaseModel, Field 18 | from util import ParamsModel 19 | from PIL import Image 20 | import math 21 | 22 | controlnet_model = "lllyasviel/control_v11p_sd15_canny" 23 | model_id = "runwayml/stable-diffusion-v1-5" 24 | taesd_model = "madebyollin/taesd" 25 | 26 | default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece" 27 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated" 28 | page_content = """ 29 |

Flash-SD

30 |

Image-to-Image ControlNet

31 | 32 | """ 33 | 34 | 35 | class Pipeline: 36 | class Info(BaseModel): 37 | name: str = "controlnet+SDXL+Turbo" 38 | title: str = "SDXL Turbo + Controlnet" 39 | description: str = "Generates an image from a text prompt" 40 | input_mode: str = "image" 41 | page_content: str = page_content 42 | 43 | class InputParams(ParamsModel): 44 | prompt: str = Field( 45 | default_prompt, 46 | title="Prompt", 47 | field="textarea", 48 | id="prompt", 49 | ) 50 | negative_prompt: str = Field( 51 | default_negative_prompt, 52 | title="Negative Prompt", 53 | field="textarea", 54 | id="negative_prompt", 55 | hide=True, 56 | ) 57 | seed: int = Field( 58 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed" 59 | ) 60 | steps: int = Field( 61 | 2, min=1, max=15, title="Steps", field="range", hide=True, id="steps" 62 | ) 63 | width: int = Field( 64 | 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width" 65 | ) 66 | height: int = Field( 67 | 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height" 68 | ) 69 | strength: float = Field( 70 | 0.5, 71 | min=0.25, 72 | max=1.0, 73 | step=0.001, 74 | title="Strength", 75 | field="range", 76 | hide=True, 77 | id="strength", 78 | ) 79 | controlnet_scale: float = Field( 80 | 0.5, 81 | min=0, 82 | max=1.0, 83 | step=0.001, 84 | title="Controlnet Scale", 85 | field="range", 86 | hide=True, 87 | id="controlnet_scale", 88 | ) 89 | controlnet_start: float = Field( 90 | 0.0, 91 | min=0, 92 | max=1.0, 93 | step=0.001, 94 | title="Controlnet Start", 95 | field="range", 96 | hide=True, 97 | id="controlnet_start", 98 | ) 99 | controlnet_end: float = Field( 100 | 1.0, 101 | min=0, 102 | max=1.0, 103 | step=0.001, 104 | title="Controlnet End", 105 | field="range", 106 | hide=True, 107 | id="controlnet_end", 108 | ) 109 | canny_low_threshold: float = Field( 110 | 0.31, 111 | min=0, 112 | max=1.0, 113 | step=0.001, 114 | title="Canny Low Threshold", 115 | field="range", 116 | hide=True, 117 | id="canny_low_threshold", 118 | ) 119 | canny_high_threshold: float = Field( 120 | 0.125, 121 | min=0, 122 | max=1.0, 123 | step=0.001, 124 | title="Canny High Threshold", 125 | field="range", 126 | hide=True, 127 | id="canny_high_threshold", 128 | ) 129 | debug_canny: bool = Field( 130 | False, 131 | title="Debug Canny", 132 | field="checkbox", 133 | hide=True, 134 | id="debug_canny", 135 | ) 136 | 137 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype): 138 | controlnet_canny = ControlNetModel.from_pretrained( 139 | controlnet_model, torch_dtype=torch_dtype 140 | ) 141 | 142 | self.pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained( 143 | model_id, 144 | safety_checker=None, 145 | controlnet=controlnet_canny, 146 | torch_dtype=torch_dtype, 147 | ) 148 | 149 | self.pipe.scheduler = LCMScheduler.from_pretrained( 150 | model_id, 151 | subfolder="scheduler", 152 | timestep_spacing="trailing", 153 | ) 154 | 155 | if args.taesd: 156 | self.pipe.vae = AutoencoderTiny.from_pretrained( 157 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True 158 | ) 159 | self.pipe.load_lora_weights("jasperai/flash-sd") 160 | self.pipe.fuse_lora() 161 | 162 | self.canny_torch = SobelOperator(device=device) 163 | 164 | if args.sfast: 165 | from sfast.compilers.stable_diffusion_pipeline_compiler import ( 166 | compile, 167 | CompilationConfig, 168 | ) 169 | 170 | config = CompilationConfig.Default() 171 | # config.enable_xformers = True 172 | config.enable_triton = True 173 | config.enable_cuda_graph = True 174 | self.pipe = compile(self.pipe, config=config) 175 | 176 | self.pipe.set_progress_bar_config(disable=True) 177 | self.pipe.to(device=device) 178 | if device.type != "mps": 179 | self.pipe.unet.to(memory_format=torch.channels_last) 180 | 181 | if args.compel: 182 | self.pipe.compel_proc = Compel( 183 | tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2], 184 | text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2], 185 | returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, 186 | requires_pooled=[False, True], 187 | ) 188 | 189 | if args.torch_compile: 190 | self.pipe.unet = torch.compile( 191 | self.pipe.unet, mode="reduce-overhead", fullgraph=True 192 | ) 193 | self.pipe.vae = torch.compile( 194 | self.pipe.vae, mode="reduce-overhead", fullgraph=True 195 | ) 196 | self.pipe( 197 | prompt="warmup", 198 | image=[Image.new("RGB", (768, 768))], 199 | control_image=[Image.new("RGB", (768, 768))], 200 | ) 201 | 202 | def predict(self, params: "Pipeline.InputParams") -> Image.Image: 203 | generator = torch.manual_seed(params.seed) 204 | 205 | prompt = params.prompt 206 | negative_prompt = params.negative_prompt 207 | prompt_embeds = None 208 | pooled_prompt_embeds = None 209 | negative_prompt_embeds = None 210 | negative_pooled_prompt_embeds = None 211 | if hasattr(self.pipe, "compel_proc"): 212 | _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc( 213 | [params.prompt, params.negative_prompt] 214 | ) 215 | prompt = None 216 | negative_prompt = None 217 | prompt_embeds = _prompt_embeds[0:1] 218 | pooled_prompt_embeds = pooled_prompt_embeds[0:1] 219 | negative_prompt_embeds = _prompt_embeds[1:2] 220 | negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2] 221 | 222 | control_image = self.canny_torch( 223 | params.image, params.canny_low_threshold, params.canny_high_threshold 224 | ) 225 | steps = params.steps 226 | strength = params.strength 227 | if int(steps * strength) < 1: 228 | steps = math.ceil(1 / max(0.10, strength)) 229 | 230 | results = self.pipe( 231 | image=params.image, 232 | control_image=control_image, 233 | prompt=prompt, 234 | negative_prompt=negative_prompt, 235 | prompt_embeds=prompt_embeds, 236 | pooled_prompt_embeds=pooled_prompt_embeds, 237 | negative_prompt_embeds=negative_prompt_embeds, 238 | negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, 239 | generator=generator, 240 | strength=strength, 241 | num_inference_steps=steps, 242 | guidance_scale=0, 243 | width=params.width, 244 | height=params.height, 245 | output_type="pil", 246 | controlnet_conditioning_scale=params.controlnet_scale, 247 | control_guidance_start=params.controlnet_start, 248 | control_guidance_end=params.controlnet_end, 249 | ) 250 | 251 | result_image = results.images[0] 252 | if params.debug_canny: 253 | # paste control_image on top of result_image 254 | w0, h0 = (200, 200) 255 | control_image = control_image.resize((w0, h0)) 256 | w1, h1 = result_image.size 257 | result_image.paste(control_image, (w1 - w0, h1 - h0)) 258 | 259 | return result_image 260 | -------------------------------------------------------------------------------- /server/pipelines/controlnetLoraSD15QRCode.py: -------------------------------------------------------------------------------- 1 | from diffusers import ( 2 | StableDiffusionControlNetImg2ImgPipeline, 3 | ControlNetModel, 4 | LCMScheduler, 5 | AutoencoderTiny, 6 | ) 7 | from compel import Compel 8 | import torch 9 | 10 | try: 11 | import intel_extension_for_pytorch as ipex # type: ignore 12 | except: 13 | pass 14 | 15 | import psutil 16 | from config import Args 17 | from pydantic import BaseModel, Field 18 | from util import ParamsModel 19 | from PIL import Image 20 | import math 21 | 22 | taesd_model = "madebyollin/taesd" 23 | controlnet_model = "monster-labs/control_v1p_sd15_qrcode_monster" 24 | base_model = "nitrosocke/mo-di-diffusion" 25 | lcm_lora_id = "latent-consistency/lcm-lora-sdv1-5" 26 | default_prompt = "abstract art of a men with curly hair by Pablo Picasso" 27 | page_content = """ 28 |

Real-Time Latent Consistency Model SDv1.5

29 |

LCM + LoRA + Controlnet + QRCode

30 |

31 | This demo showcases 32 | LCM LoRA 36 | + ControlNet + Image to Imasge pipeline using 37 | Diffusers with a MJPEG stream server. 42 |

43 |

44 | Change the prompt to generate different images, accepts Compel syntax. 49 |

50 | """ 51 | 52 | 53 | class Pipeline: 54 | class Info(BaseModel): 55 | name: str = "controlnet+loras+sd15" 56 | title: str = "LCM + LoRA + Controlnet" 57 | description: str = "Generates an image from a text prompt" 58 | input_mode: str = "image" 59 | page_content: str = page_content 60 | 61 | class InputParams(ParamsModel): 62 | prompt: str = Field( 63 | default_prompt, 64 | title="Prompt", 65 | field="textarea", 66 | id="prompt", 67 | ) 68 | seed: int = Field( 69 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed" 70 | ) 71 | steps: int = Field( 72 | 5, min=1, max=15, title="Steps", field="range", hide=True, id="steps" 73 | ) 74 | width: int = Field( 75 | 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width" 76 | ) 77 | height: int = Field( 78 | 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height" 79 | ) 80 | guidance_scale: float = Field( 81 | 1.0, 82 | min=0, 83 | max=2, 84 | step=0.001, 85 | title="Guidance Scale", 86 | field="range", 87 | hide=True, 88 | id="guidance_scale", 89 | ) 90 | strength: float = Field( 91 | 0.6, 92 | min=0.25, 93 | max=1.0, 94 | step=0.001, 95 | title="Strength", 96 | field="range", 97 | hide=True, 98 | id="strength", 99 | ) 100 | controlnet_scale: float = Field( 101 | 1.0, 102 | min=0, 103 | max=1.0, 104 | step=0.001, 105 | title="Controlnet Scale", 106 | field="range", 107 | hide=True, 108 | id="controlnet_scale", 109 | ) 110 | controlnet_start: float = Field( 111 | 0.0, 112 | min=0, 113 | max=1.0, 114 | step=0.001, 115 | title="Controlnet Start", 116 | field="range", 117 | hide=True, 118 | id="controlnet_start", 119 | ) 120 | controlnet_end: float = Field( 121 | 1.0, 122 | min=0, 123 | max=1.0, 124 | step=0.001, 125 | title="Controlnet End", 126 | field="range", 127 | hide=True, 128 | id="controlnet_end", 129 | ) 130 | blend: float = Field( 131 | 0.1, 132 | min=0.0, 133 | max=1.0, 134 | step=0.001, 135 | title="Blend", 136 | field="range", 137 | hide=True, 138 | id="blend", 139 | ) 140 | 141 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype): 142 | controlnet_qrcode = ControlNetModel.from_pretrained( 143 | controlnet_model, torch_dtype=torch_dtype, subfolder="v2" 144 | ).to(device) 145 | 146 | if args.safety_checker: 147 | self.pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained( 148 | base_model, 149 | controlnet=controlnet_qrcode, 150 | ) 151 | else: 152 | self.pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained( 153 | base_model, 154 | safety_checker=None, 155 | controlnet=controlnet_qrcode, 156 | ) 157 | 158 | self.control_image = Image.open("qr-code.png").convert("RGB").resize((512, 512)) 159 | 160 | self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config) 161 | self.pipe.set_progress_bar_config(disable=True) 162 | if device.type != "mps": 163 | self.pipe.unet.to(memory_format=torch.channels_last) 164 | 165 | if args.taesd: 166 | self.pipe.vae = AutoencoderTiny.from_pretrained( 167 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True 168 | ).to(device) 169 | 170 | # Load LCM LoRA 171 | self.pipe.load_lora_weights(lcm_lora_id, adapter_name="lcm") 172 | self.pipe.to(device=device, dtype=torch_dtype).to(device) 173 | if args.compel: 174 | self.compel_proc = Compel( 175 | tokenizer=self.pipe.tokenizer, 176 | text_encoder=self.pipe.text_encoder, 177 | truncate_long_prompts=False, 178 | ) 179 | if args.torch_compile: 180 | self.pipe.unet = torch.compile( 181 | self.pipe.unet, mode="reduce-overhead", fullgraph=True 182 | ) 183 | self.pipe.vae = torch.compile( 184 | self.pipe.vae, mode="reduce-overhead", fullgraph=True 185 | ) 186 | self.pipe( 187 | prompt="warmup", 188 | image=[Image.new("RGB", (512, 512))], 189 | control_image=[Image.new("RGB", (512, 512))], 190 | ) 191 | 192 | def predict(self, params: "Pipeline.InputParams") -> Image.Image: 193 | generator = torch.manual_seed(params.seed) 194 | 195 | prompt = f"modern disney style {params.prompt}" 196 | prompt_embeds = None 197 | prompt = params.prompt 198 | if hasattr(self, "compel_proc"): 199 | prompt_embeds = self.compel_proc(prompt) 200 | prompt = None 201 | 202 | steps = params.steps 203 | strength = params.strength 204 | if int(steps * strength) < 1: 205 | steps = math.ceil(1 / max(0.10, strength)) 206 | 207 | blend_qr_image = Image.blend( 208 | params.image, self.control_image, alpha=params.blend 209 | ) 210 | results = self.pipe( 211 | image=blend_qr_image, 212 | control_image=self.control_image, 213 | prompt=prompt, 214 | prompt_embeds=prompt_embeds, 215 | generator=generator, 216 | strength=strength, 217 | num_inference_steps=steps, 218 | guidance_scale=params.guidance_scale, 219 | width=params.width, 220 | height=params.height, 221 | output_type="pil", 222 | controlnet_conditioning_scale=params.controlnet_scale, 223 | control_guidance_start=params.controlnet_start, 224 | control_guidance_end=params.controlnet_end, 225 | ) 226 | 227 | return results.images[0] 228 | -------------------------------------------------------------------------------- /server/pipelines/controlnetPCMSD15.py: -------------------------------------------------------------------------------- 1 | from diffusers import ( 2 | StableDiffusionControlNetImg2ImgPipeline, 3 | ControlNetModel, 4 | TCDScheduler, 5 | AutoencoderTiny, 6 | ) 7 | from compel import Compel 8 | import torch 9 | from pipelines.utils.canny_gpu import SobelOperator 10 | 11 | try: 12 | import intel_extension_for_pytorch as ipex # type: ignore 13 | except: 14 | pass 15 | 16 | from config import Args 17 | from pydantic import BaseModel, Field 18 | from util import ParamsModel 19 | from PIL import Image 20 | 21 | taesd_model = "madebyollin/taesd" 22 | controlnet_model = "lllyasviel/control_v11p_sd15_canny" 23 | base_model_id = "runwayml/stable-diffusion-v1-5" 24 | pcm_base = "wangfuyun/PCM_Weights" 25 | pcm_lora_ckpts = { 26 | "2-Step": ["pcm_sd15_smallcfg_2step_converted.safetensors", 2, 0.0], 27 | "4-Step": ["pcm_sd15_smallcfg_4step_converted.safetensors", 4, 0.0], 28 | "8-Step": ["pcm_sd15_smallcfg_8step_converted.safetensors", 8, 0.0], 29 | "16-Step": ["pcm_sd15_smallcfg_16step_converted.safetensors", 16, 0.0], 30 | "Normal CFG 4-Step": ["pcm_sd15_normalcfg_4step_converted.safetensors", 4, 7.5], 31 | "Normal CFG 8-Step": ["pcm_sd15_normalcfg_8step_converted.safetensors", 8, 7.5], 32 | "Normal CFG 16-Step": ["pcm_sd15_normalcfg_16step_converted.safetensors", 16, 7.5], 33 | } 34 | default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece" 35 | page_content = """ 36 | 37 | """ 38 | 39 | 40 | class Pipeline: 41 | class Info(BaseModel): 42 | name: str = "controlnet+loras+sd15" 43 | title: str = "LCM + LoRA + Controlnet" 44 | description: str = "Generates an image from a text prompt" 45 | input_mode: str = "image" 46 | page_content: str = page_content 47 | 48 | class InputParams(ParamsModel): 49 | prompt: str = Field( 50 | default_prompt, 51 | title="Prompt", 52 | field="textarea", 53 | id="prompt", 54 | ) 55 | lora_ckpt_id: str = Field( 56 | "4-Step", 57 | title="PCM Base Model", 58 | values=list(pcm_lora_ckpts.keys()), 59 | field="select", 60 | id="lora_ckpt_id", 61 | ) 62 | seed: int = Field( 63 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed" 64 | ) 65 | width: int = Field( 66 | 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width" 67 | ) 68 | height: int = Field( 69 | 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height" 70 | ) 71 | strength: float = Field( 72 | 0.5, 73 | min=0.25, 74 | max=1.0, 75 | step=0.001, 76 | title="Strength", 77 | field="range", 78 | hide=True, 79 | id="strength", 80 | ) 81 | controlnet_scale: float = Field( 82 | 0.8, 83 | min=0, 84 | max=1.0, 85 | step=0.001, 86 | title="Controlnet Scale", 87 | field="range", 88 | hide=True, 89 | id="controlnet_scale", 90 | ) 91 | controlnet_start: float = Field( 92 | 0.0, 93 | min=0, 94 | max=1.0, 95 | step=0.001, 96 | title="Controlnet Start", 97 | field="range", 98 | hide=True, 99 | id="controlnet_start", 100 | ) 101 | controlnet_end: float = Field( 102 | 1.0, 103 | min=0, 104 | max=1.0, 105 | step=0.001, 106 | title="Controlnet End", 107 | field="range", 108 | hide=True, 109 | id="controlnet_end", 110 | ) 111 | canny_low_threshold: float = Field( 112 | 0.31, 113 | min=0, 114 | max=1.0, 115 | step=0.001, 116 | title="Canny Low Threshold", 117 | field="range", 118 | hide=True, 119 | id="canny_low_threshold", 120 | ) 121 | canny_high_threshold: float = Field( 122 | 0.125, 123 | min=0, 124 | max=1.0, 125 | step=0.001, 126 | title="Canny High Threshold", 127 | field="range", 128 | hide=True, 129 | id="canny_high_threshold", 130 | ) 131 | debug_canny: bool = Field( 132 | False, 133 | title="Debug Canny", 134 | field="checkbox", 135 | hide=True, 136 | id="debug_canny", 137 | ) 138 | 139 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype): 140 | controlnet_canny = ControlNetModel.from_pretrained( 141 | controlnet_model, torch_dtype=torch_dtype 142 | ).to(device) 143 | 144 | self.pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained( 145 | base_model_id, 146 | safety_checker=None, 147 | controlnet=controlnet_canny, 148 | ) 149 | 150 | self.canny_torch = SobelOperator(device=device) 151 | 152 | self.pipe.scheduler = TCDScheduler( 153 | num_train_timesteps=1000, 154 | beta_start=0.00085, 155 | beta_end=0.012, 156 | beta_schedule="scaled_linear", 157 | timestep_spacing="trailing", 158 | ) 159 | 160 | self.pipe.set_progress_bar_config(disable=True) 161 | if device.type != "mps": 162 | self.pipe.unet.to(memory_format=torch.channels_last) 163 | 164 | if args.taesd: 165 | self.pipe.vae = AutoencoderTiny.from_pretrained( 166 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True 167 | ).to(device) 168 | 169 | self.loaded_lora = "4-Step" 170 | self.pipe.load_lora_weights( 171 | pcm_base, 172 | weight_name=pcm_lora_ckpts[self.loaded_lora][0], 173 | subfolder="sd15", 174 | ) 175 | self.pipe.to(device=device, dtype=torch_dtype).to(device) 176 | if args.compel: 177 | self.compel_proc = Compel( 178 | tokenizer=self.pipe.tokenizer, 179 | text_encoder=self.pipe.text_encoder, 180 | truncate_long_prompts=False, 181 | ) 182 | if args.torch_compile: 183 | self.pipe.unet = torch.compile( 184 | self.pipe.unet, mode="reduce-overhead", fullgraph=True 185 | ) 186 | self.pipe.vae = torch.compile( 187 | self.pipe.vae, mode="reduce-overhead", fullgraph=True 188 | ) 189 | self.pipe( 190 | prompt="warmup", 191 | image=[Image.new("RGB", (768, 768))], 192 | control_image=[Image.new("RGB", (768, 768))], 193 | ) 194 | 195 | def predict(self, params: "Pipeline.InputParams") -> Image.Image: 196 | generator = torch.manual_seed(params.seed) 197 | guidance_scale = pcm_lora_ckpts[params.lora_ckpt_id][2] 198 | steps = pcm_lora_ckpts[params.lora_ckpt_id][1] 199 | 200 | if self.loaded_lora != params.lora_ckpt_id: 201 | checkpoint = pcm_lora_ckpts[params.lora_ckpt_id][0] 202 | self.pipe.load_lora_weights( 203 | pcm_base, 204 | weight_name=checkpoint, 205 | subfolder="sd15", 206 | ) 207 | self.loaded_lora = params.lora_ckpt_id 208 | 209 | prompt_embeds = None 210 | prompt = params.prompt 211 | if hasattr(self, "compel_proc"): 212 | prompt_embeds = self.compel_proc(prompt) 213 | prompt = None 214 | control_image = self.canny_torch( 215 | params.image, params.canny_low_threshold, params.canny_high_threshold 216 | ) 217 | strength = params.strength 218 | 219 | results = self.pipe( 220 | image=params.image, 221 | control_image=control_image, 222 | prompt=prompt, 223 | prompt_embeds=prompt_embeds, 224 | generator=generator, 225 | strength=strength, 226 | num_inference_steps=steps, 227 | guidance_scale=guidance_scale, 228 | width=params.width, 229 | height=params.height, 230 | output_type="pil", 231 | controlnet_conditioning_scale=params.controlnet_scale, 232 | control_guidance_start=params.controlnet_start, 233 | control_guidance_end=params.controlnet_end, 234 | ) 235 | 236 | result_image = results.images[0] 237 | if params.debug_canny: 238 | # paste control_image on top of result_image 239 | w0, h0 = (200, 200) 240 | control_image = control_image.resize((w0, h0)) 241 | w1, h1 = result_image.size 242 | result_image.paste(control_image, (w1 - w0, h1 - h0)) 243 | 244 | return result_image 245 | -------------------------------------------------------------------------------- /server/pipelines/img2img.py: -------------------------------------------------------------------------------- 1 | from diffusers import ( 2 | AutoPipelineForImage2Image, 3 | AutoencoderTiny, 4 | ) 5 | from compel import Compel 6 | import torch 7 | 8 | try: 9 | import intel_extension_for_pytorch as ipex # type: ignore 10 | except: 11 | pass 12 | 13 | import psutil 14 | from config import Args 15 | from pydantic import BaseModel, Field 16 | from PIL import Image 17 | from util import ParamsModel 18 | import math 19 | 20 | base_model = "SimianLuo/LCM_Dreamshaper_v7" 21 | taesd_model = "madebyollin/taesd" 22 | 23 | default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece" 24 | page_content = """ 25 |

Real-Time Latent Consistency Model

26 |

Image-to-Image LCM

27 |

28 | This demo showcases 29 | LCM 33 | Image to Image pipeline using 34 | Diffusers with a MJPEG stream server. 39 |

40 |

41 | Change the prompt to generate different images, accepts Compel syntax. 46 |

47 | """ 48 | 49 | 50 | class Pipeline: 51 | class Info(BaseModel): 52 | name: str = "img2img" 53 | title: str = "Image-to-Image LCM" 54 | description: str = "Generates an image from a text prompt" 55 | input_mode: str = "image" 56 | page_content: str = page_content 57 | 58 | class InputParams(ParamsModel): 59 | prompt: str = Field( 60 | default_prompt, 61 | title="Prompt", 62 | field="textarea", 63 | id="prompt", 64 | ) 65 | seed: int = Field( 66 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed" 67 | ) 68 | steps: int = Field( 69 | 4, min=1, max=15, title="Steps", field="range", hide=True, id="steps" 70 | ) 71 | width: int = Field( 72 | 768, min=2, max=15, title="Width", disabled=True, hide=True, id="width" 73 | ) 74 | height: int = Field( 75 | 768, min=2, max=15, title="Height", disabled=True, hide=True, id="height" 76 | ) 77 | guidance_scale: float = Field( 78 | 0.2, 79 | min=0, 80 | max=20, 81 | step=0.001, 82 | title="Guidance Scale", 83 | field="range", 84 | hide=True, 85 | id="guidance_scale", 86 | ) 87 | strength: float = Field( 88 | 0.5, 89 | min=0.25, 90 | max=1.0, 91 | step=0.001, 92 | title="Strength", 93 | field="range", 94 | hide=True, 95 | id="strength", 96 | ) 97 | 98 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype): 99 | self.pipe = AutoPipelineForImage2Image.from_pretrained( 100 | base_model, 101 | safety_checker=None, 102 | ) 103 | if args.taesd: 104 | self.pipe.vae = AutoencoderTiny.from_pretrained( 105 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True 106 | ).to(device) 107 | 108 | if args.sfast: 109 | from sfast.compilers.stable_diffusion_pipeline_compiler import ( 110 | compile, 111 | CompilationConfig, 112 | ) 113 | 114 | config = CompilationConfig.Default() 115 | config.enable_xformers = True 116 | config.enable_triton = True 117 | config.enable_cuda_graph = True 118 | self.pipe = compile(self.pipe, config=config) 119 | 120 | self.pipe.set_progress_bar_config(disable=True) 121 | self.pipe.to(device=device, dtype=torch_dtype) 122 | if device.type != "mps": 123 | self.pipe.unet.to(memory_format=torch.channels_last) 124 | 125 | if args.torch_compile: 126 | print("Running torch compile") 127 | self.pipe.unet = torch.compile( 128 | self.pipe.unet, mode="reduce-overhead", fullgraph=True 129 | ) 130 | self.pipe.vae = torch.compile( 131 | self.pipe.vae, mode="reduce-overhead", fullgraph=True 132 | ) 133 | 134 | self.pipe( 135 | prompt="warmup", 136 | image=[Image.new("RGB", (768, 768))], 137 | ) 138 | 139 | if args.compel: 140 | self.compel_proc = Compel( 141 | tokenizer=self.pipe.tokenizer, 142 | text_encoder=self.pipe.text_encoder, 143 | truncate_long_prompts=False, 144 | ) 145 | 146 | def predict(self, params: "Pipeline.InputParams") -> Image.Image: 147 | generator = torch.manual_seed(params.seed) 148 | prompt_embeds = None 149 | prompt = params.prompt 150 | if hasattr(self, "compel_proc"): 151 | prompt_embeds = self.compel_proc(params.prompt) 152 | prompt = None 153 | 154 | steps = params.steps 155 | strength = params.strength 156 | if int(steps * strength) < 1: 157 | steps = math.ceil(1 / max(0.10, strength)) 158 | 159 | results = self.pipe( 160 | image=params.image, 161 | prompt=prompt, 162 | prompt_embeds=prompt_embeds, 163 | generator=generator, 164 | strength=strength, 165 | num_inference_steps=steps, 166 | guidance_scale=params.guidance_scale, 167 | width=params.width, 168 | height=params.height, 169 | output_type="pil", 170 | ) 171 | 172 | return results.images[0] 173 | -------------------------------------------------------------------------------- /server/pipelines/img2imgFlux.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from optimum.quanto import freeze, qfloat8, quantize 4 | from transformers.modeling_utils import PreTrainedModel 5 | from diffusers import AutoencoderTiny 6 | from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel 7 | from diffusers.pipelines.flux.pipeline_flux_img2img import FluxImg2ImgPipeline 8 | from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast 9 | from diffusers import FlowMatchEulerDiscreteScheduler, AutoencoderKL 10 | 11 | 12 | from pruna import smash, SmashConfig 13 | from pruna.telemetry import set_telemetry_metrics 14 | 15 | set_telemetry_metrics(False) # disable telemetry for current session 16 | set_telemetry_metrics(False, set_as_default=True) # disable telemetry globally 17 | 18 | 19 | try: 20 | import intel_extension_for_pytorch as ipex # type: ignore 21 | except: 22 | pass 23 | 24 | import psutil 25 | from config import Args 26 | from pydantic import BaseModel, Field 27 | from PIL import Image 28 | from pathlib import Path 29 | from util import ParamsModel 30 | import math 31 | import gc 32 | 33 | 34 | # model_path = "black-forest-labs/FLUX.1-dev" 35 | model_path = "black-forest-labs/FLUX.1-schnell" 36 | base_model_path = "black-forest-labs/FLUX.1-schnell" 37 | taesd_path = "madebyollin/taef1" 38 | subfolder = "transformer" 39 | transformer_path = model_path 40 | models_path = Path("models") 41 | 42 | default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux" 43 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated" 44 | page_content = """ 45 |

Real-Time FLUX

46 | 47 | """ 48 | 49 | 50 | def flush(): 51 | torch.cuda.empty_cache() 52 | gc.collect() 53 | 54 | 55 | class Pipeline: 56 | class Info(BaseModel): 57 | name: str = "img2img" 58 | title: str = "Image-to-Image SDXL" 59 | description: str = "Generates an image from a text prompt" 60 | input_mode: str = "image" 61 | page_content: str = page_content 62 | 63 | class InputParams(ParamsModel): 64 | prompt: str = Field( 65 | default_prompt, 66 | title="Prompt", 67 | field="textarea", 68 | id="prompt", 69 | ) 70 | seed: int = Field( 71 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed" 72 | ) 73 | steps: int = Field( 74 | 1, min=1, max=15, title="Steps", field="range", hide=True, id="steps" 75 | ) 76 | width: int = Field( 77 | 1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width" 78 | ) 79 | height: int = Field( 80 | 1024, min=2, max=15, title="Height", disabled=True, hide=True, id="height" 81 | ) 82 | strength: float = Field( 83 | 0.5, 84 | min=0.25, 85 | max=1.0, 86 | step=0.001, 87 | title="Strength", 88 | field="range", 89 | hide=True, 90 | id="strength", 91 | ) 92 | guidance: float = Field( 93 | 3.5, 94 | min=0, 95 | max=20, 96 | step=0.001, 97 | title="Guidance", 98 | hide=True, 99 | field="range", 100 | id="guidance", 101 | ) 102 | 103 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype): 104 | # ckpt_path = ( 105 | # "https://huggingface.co/city96/FLUX.1-dev-gguf/blob/main/flux1-dev-Q2_K.gguf" 106 | # ) 107 | print("Loading model") 108 | 109 | model_id = "black-forest-labs/FLUX.1-schnell" 110 | model_revision = "refs/pr/1" 111 | text_model_id = "openai/clip-vit-large-patch14" 112 | model_data_type = torch.bfloat16 113 | tokenizer = CLIPTokenizer.from_pretrained( 114 | text_model_id, torch_dtype=model_data_type 115 | ) 116 | text_encoder = CLIPTextModel.from_pretrained( 117 | text_model_id, torch_dtype=model_data_type 118 | ) 119 | 120 | # 2 121 | tokenizer_2 = T5TokenizerFast.from_pretrained( 122 | model_id, 123 | subfolder="tokenizer_2", 124 | torch_dtype=model_data_type, 125 | revision=model_revision, 126 | ) 127 | text_encoder_2 = T5EncoderModel.from_pretrained( 128 | model_id, 129 | subfolder="text_encoder_2", 130 | torch_dtype=model_data_type, 131 | revision=model_revision, 132 | ) 133 | 134 | # Transformers 135 | scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained( 136 | model_id, subfolder="scheduler", revision=model_revision 137 | ) 138 | transformer = FluxTransformer2DModel.from_pretrained( 139 | model_id, 140 | subfolder="transformer", 141 | torch_dtype=model_data_type, 142 | revision=model_revision, 143 | ) 144 | 145 | # VAE 146 | # vae = AutoencoderKL.from_pretrained( 147 | # model_id, 148 | # subfolder="vae", 149 | # torch_dtype=model_data_type, 150 | # revision=model_revision, 151 | # ) 152 | 153 | vae = AutoencoderTiny.from_pretrained( 154 | "madebyollin/taef1", torch_dtype=torch.bfloat16 155 | ) 156 | 157 | # Initialize the SmashConfig 158 | smash_config = SmashConfig() 159 | smash_config["quantizer"] = "quanto" 160 | smash_config["quanto_calibrate"] = False 161 | smash_config["quanto_weight_bits"] = "qint4" 162 | # ( 163 | # "qint4" # "qfloat8" # or "qint2", "qint4", "qint8" 164 | # ) 165 | 166 | transformer = smash( 167 | model=transformer, 168 | smash_config=smash_config, 169 | ) 170 | text_encoder_2 = smash( 171 | model=text_encoder_2, 172 | smash_config=smash_config, 173 | ) 174 | 175 | pipe = FluxImg2ImgPipeline( 176 | scheduler=scheduler, 177 | text_encoder=text_encoder, 178 | tokenizer=tokenizer, 179 | text_encoder_2=text_encoder_2, 180 | tokenizer_2=tokenizer_2, 181 | vae=vae, 182 | transformer=transformer, 183 | ) 184 | 185 | # if args.taesd: 186 | # pipe.vae = AutoencoderTiny.from_pretrained( 187 | # taesd_path, torch_dtype=torch.bfloat16, use_safetensors=True 188 | # ) 189 | # pipe.enable_model_cpu_offload() 190 | pipe.text_encoder.to(device) 191 | pipe.vae.to(device) 192 | pipe.transformer.to(device) 193 | pipe.text_encoder_2.to(device) 194 | 195 | # pipe.enable_model_cpu_offload() 196 | # For added memory savings run this block, there is however a trade-off with speed. 197 | # vae.enable_tiling() 198 | # vae.enable_slicing() 199 | # pipe.enable_sequential_cpu_offload() 200 | 201 | self.pipe = pipe 202 | self.pipe.set_progress_bar_config(disable=True) 203 | # vae = AutoencoderKL.from_pretrained( 204 | # base_model_path, subfolder="vae", torch_dtype=torch_dtype 205 | # ) 206 | 207 | def predict(self, params: "Pipeline.InputParams") -> Image.Image: 208 | generator = torch.manual_seed(params.seed) 209 | steps = params.steps 210 | strength = params.strength 211 | prompt = params.prompt 212 | guidance = params.guidance 213 | 214 | results = self.pipe( 215 | image=params.image, 216 | prompt=prompt, 217 | generator=generator, 218 | strength=strength, 219 | num_inference_steps=steps, 220 | guidance_scale=guidance, 221 | width=params.width, 222 | height=params.height, 223 | ) 224 | return results.images[0] 225 | -------------------------------------------------------------------------------- /server/pipelines/img2imgSDTurbo.py: -------------------------------------------------------------------------------- 1 | from diffusers import ( 2 | AutoPipelineForImage2Image, 3 | AutoencoderTiny, 4 | ) 5 | import torch 6 | 7 | 8 | from config import Args 9 | from pydantic import BaseModel, Field 10 | from PIL import Image 11 | from util import ParamsModel 12 | import math 13 | 14 | from pruna import smash, SmashConfig 15 | 16 | base_model = "stabilityai/sd-turbo" 17 | taesd_model = "madebyollin/taesd" 18 | 19 | default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux" 20 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated" 21 | page_content = """ 22 |

Real-Time SD-Turbo

23 |

Image-to-Image

24 |

25 | This demo showcases 26 | SDXL Turbo 30 | Image to Image pipeline using 31 | Diffusers with a MJPEG stream server. 36 |

37 |

38 | Change the prompt to generate different images, accepts Compel syntax. 43 |

44 | """ 45 | 46 | 47 | class Pipeline: 48 | class Info(BaseModel): 49 | name: str = "img2img" 50 | title: str = "Image-to-Image SDXL" 51 | description: str = "Generates an image from a text prompt" 52 | input_mode: str = "image" 53 | page_content: str = page_content 54 | 55 | class InputParams(ParamsModel): 56 | prompt: str = Field( 57 | default_prompt, 58 | title="Prompt", 59 | field="textarea", 60 | id="prompt", 61 | ) 62 | negative_prompt: str = Field( 63 | default_negative_prompt, 64 | title="Negative Prompt", 65 | field="textarea", 66 | id="negative_prompt", 67 | hide=True, 68 | ) 69 | seed: int = Field( 70 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed" 71 | ) 72 | steps: int = Field( 73 | 1, min=1, max=15, title="Steps", field="range", hide=True, id="steps" 74 | ) 75 | width: int = Field( 76 | 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width" 77 | ) 78 | height: int = Field( 79 | 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height" 80 | ) 81 | strength: float = Field( 82 | 0.5, 83 | min=0.25, 84 | max=1.0, 85 | step=0.001, 86 | title="Strength", 87 | field="range", 88 | hide=True, 89 | id="strength", 90 | ) 91 | 92 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype): 93 | self.pipe = AutoPipelineForImage2Image.from_pretrained( 94 | base_model, 95 | safety_checker=None, 96 | ) 97 | if args.taesd: 98 | self.pipe.vae = AutoencoderTiny.from_pretrained( 99 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True 100 | ).to(device) 101 | 102 | if args.pruna: 103 | # Create and smash your model 104 | smash_config = SmashConfig() 105 | # smash_config["cacher"] = "deepcache" 106 | smash_config["compiler"] = "stable_fast" 107 | self.pipe = smash(model=self.pipe, smash_config=smash_config) 108 | 109 | self.pipe.set_progress_bar_config(disable=True) 110 | self.pipe.to(device=device, dtype=torch_dtype) 111 | # if device.type != "mps": 112 | # self.pipe.unet.to(memory_format=torch.channels_last) 113 | 114 | if args.torch_compile: 115 | print("Running torch compile") 116 | self.pipe.unet = torch.compile( 117 | self.pipe.unet, mode="reduce-overhead", fullgraph=True 118 | ) 119 | self.pipe.vae = torch.compile( 120 | self.pipe.vae, mode="reduce-overhead", fullgraph=True 121 | ) 122 | 123 | self.pipe( 124 | prompt="warmup", 125 | image=[Image.new("RGB", (768, 768))], 126 | ) 127 | if args.compel: 128 | from compel import Compel 129 | 130 | self.pipe.compel_proc = Compel( 131 | tokenizer=self.pipe.tokenizer, 132 | text_encoder=self.pipe.text_encoder, 133 | truncate_long_prompts=True, 134 | ) 135 | 136 | def predict(self, params: "Pipeline.InputParams") -> Image.Image: 137 | generator = torch.manual_seed(params.seed) 138 | steps = params.steps 139 | strength = params.strength 140 | if int(steps * strength) < 1: 141 | steps = math.ceil(1 / max(0.10, strength)) 142 | 143 | prompt = params.prompt 144 | prompt_embeds = None 145 | if hasattr(self.pipe, "compel_proc"): 146 | prompt_embeds = self.pipe.compel_proc( 147 | [params.prompt, params.negative_prompt] 148 | ) 149 | prompt = None 150 | 151 | results = self.pipe( 152 | image=params.image, 153 | prompt_embeds=prompt_embeds, 154 | prompt=prompt, 155 | negative_prompt=params.negative_prompt, 156 | generator=generator, 157 | strength=strength, 158 | num_inference_steps=steps, 159 | guidance_scale=1.1, 160 | width=params.width, 161 | height=params.height, 162 | output_type="pil", 163 | ) 164 | 165 | return results.images[0] 166 | -------------------------------------------------------------------------------- /server/pipelines/img2imgSDXL-Lightning.py: -------------------------------------------------------------------------------- 1 | from diffusers import ( 2 | AutoPipelineForImage2Image, 3 | AutoencoderTiny, 4 | AutoencoderKL, 5 | UNet2DConditionModel, 6 | EulerDiscreteScheduler, 7 | ) 8 | from compel import Compel, ReturnedEmbeddingsType 9 | import torch 10 | 11 | try: 12 | import intel_extension_for_pytorch as ipex # type: ignore 13 | except: 14 | pass 15 | 16 | from safetensors.torch import load_file 17 | from huggingface_hub import hf_hub_download 18 | from config import Args 19 | from pydantic import BaseModel, Field 20 | from PIL import Image 21 | from util import ParamsModel 22 | import math 23 | from pruna import SmashConfig, smash 24 | 25 | base = "stabilityai/stable-diffusion-xl-base-1.0" 26 | repo = "ByteDance/SDXL-Lightning" 27 | ckpt = "sdxl_lightning_2step_unet.safetensors" 28 | taesd_model = "madebyollin/taesdxl" 29 | NUM_STEPS = 2 30 | 31 | default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux" 32 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated" 33 | page_content = """ 34 |

Real-Time SDXL Lightning

35 |

Image-to-Image

36 |

37 | This demo showcases 38 | SDXL Turbo 42 | Image to Image pipeline using 43 | Diffusers with a MJPEG stream server. 48 |

49 |

50 | Change the prompt to generate different images, accepts Compel syntax. 55 |

56 | """ 57 | 58 | 59 | class Pipeline: 60 | class Info(BaseModel): 61 | name: str = "img2img" 62 | title: str = "Image-to-Image SDXL-Lightning" 63 | description: str = "Generates an image from a text prompt" 64 | input_mode: str = "image" 65 | page_content: str = page_content 66 | 67 | class InputParams(ParamsModel): 68 | prompt: str = Field( 69 | default_prompt, 70 | title="Prompt", 71 | field="textarea", 72 | id="prompt", 73 | ) 74 | negative_prompt: str = Field( 75 | default_negative_prompt, 76 | title="Negative Prompt", 77 | field="textarea", 78 | id="negative_prompt", 79 | hide=True, 80 | ) 81 | seed: int = Field( 82 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed" 83 | ) 84 | steps: int = Field( 85 | 1, min=1, max=10, title="Steps", field="range", hide=True, id="steps" 86 | ) 87 | width: int = Field( 88 | 1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width" 89 | ) 90 | height: int = Field( 91 | 1024, min=2, max=15, title="Height", disabled=True, hide=True, id="height" 92 | ) 93 | guidance_scale: float = Field( 94 | 0.0, 95 | min=0, 96 | max=1, 97 | step=0.001, 98 | title="Guidance Scale", 99 | field="range", 100 | hide=True, 101 | id="guidance_scale", 102 | ) 103 | strength: float = Field( 104 | 0.5, 105 | min=0.25, 106 | max=1.0, 107 | step=0.001, 108 | title="Strength", 109 | field="range", 110 | hide=True, 111 | id="strength", 112 | ) 113 | 114 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype): 115 | if args.taesd: 116 | vae = AutoencoderTiny.from_pretrained( 117 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True 118 | ) 119 | else: 120 | vae = AutoencoderKL.from_pretrained( 121 | "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype 122 | ) 123 | 124 | unet = UNet2DConditionModel.from_config(base, subfolder="unet") 125 | unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device=device.type)) 126 | self.pipe = AutoPipelineForImage2Image.from_pretrained( 127 | base, 128 | unet=unet, 129 | torch_dtype=torch_dtype, 130 | variant="fp16", 131 | safety_checker=False, 132 | vae=vae, 133 | ) 134 | # Ensure sampler uses "trailing" timesteps. 135 | self.pipe.scheduler = EulerDiscreteScheduler.from_config( 136 | self.pipe.scheduler.config, timestep_spacing="trailing" 137 | ) 138 | 139 | if args.pruna: 140 | # Create and smash your model 141 | smash_config = SmashConfig() 142 | smash_config["cacher"] = "deepcache" 143 | smash_config["compiler"] = "stable_fast" 144 | self.pipe = smash(model=self.pipe, smash_config=smash_config) 145 | 146 | if args.sfast: 147 | from sfast.compilers.stable_diffusion_pipeline_compiler import ( 148 | compile, 149 | CompilationConfig, 150 | ) 151 | 152 | config = CompilationConfig.Default() 153 | config.enable_xformers = True 154 | config.enable_triton = True 155 | config.enable_cuda_graph = True 156 | self.pipe = compile(self.pipe, config=config) 157 | 158 | self.pipe.set_progress_bar_config(disable=True) 159 | self.pipe.to(device=device, dtype=torch_dtype) 160 | if device.type != "mps": 161 | self.pipe.unet.to(memory_format=torch.channels_last) 162 | 163 | if args.torch_compile: 164 | print("Running torch compile") 165 | self.pipe.unet = torch.compile( 166 | self.pipe.unet, mode="reduce-overhead", fullgraph=True 167 | ) 168 | self.pipe.vae = torch.compile( 169 | self.pipe.vae, mode="reduce-overhead", fullgraph=True 170 | ) 171 | self.pipe( 172 | prompt="warmup", 173 | image=[Image.new("RGB", (768, 768))], 174 | ) 175 | 176 | if args.compel: 177 | self.pipe.compel_proc = Compel( 178 | tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2], 179 | text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2], 180 | returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, 181 | requires_pooled=[False, True], 182 | ) 183 | 184 | def predict(self, params: "Pipeline.InputParams") -> Image.Image: 185 | generator = torch.manual_seed(params.seed) 186 | prompt = params.prompt 187 | negative_prompt = params.negative_prompt 188 | prompt_embeds = None 189 | pooled_prompt_embeds = None 190 | negative_prompt_embeds = None 191 | negative_pooled_prompt_embeds = None 192 | if hasattr(self.pipe, "compel_proc"): 193 | _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc( 194 | [params.prompt, params.negative_prompt] 195 | ) 196 | prompt = None 197 | negative_prompt = None 198 | prompt_embeds = _prompt_embeds[0:1] 199 | pooled_prompt_embeds = pooled_prompt_embeds[0:1] 200 | negative_prompt_embeds = _prompt_embeds[1:2] 201 | negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2] 202 | 203 | steps = params.steps 204 | strength = params.strength 205 | if int(steps * strength) < 1: 206 | steps = math.ceil(1 / max(0.10, strength)) 207 | 208 | results = self.pipe( 209 | image=params.image, 210 | prompt=prompt, 211 | negative_prompt=negative_prompt, 212 | prompt_embeds=prompt_embeds, 213 | pooled_prompt_embeds=pooled_prompt_embeds, 214 | negative_prompt_embeds=negative_prompt_embeds, 215 | negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, 216 | generator=generator, 217 | strength=strength, 218 | num_inference_steps=steps, 219 | guidance_scale=params.guidance_scale, 220 | width=params.width, 221 | height=params.height, 222 | output_type="pil", 223 | ) 224 | 225 | return results.images[0] 226 | -------------------------------------------------------------------------------- /server/pipelines/img2imgSDXLTurbo.py: -------------------------------------------------------------------------------- 1 | from diffusers import ( 2 | AutoPipelineForImage2Image, 3 | AutoencoderTiny, 4 | ) 5 | from compel import Compel, ReturnedEmbeddingsType 6 | import torch 7 | 8 | try: 9 | import intel_extension_for_pytorch as ipex # type: ignore 10 | except: 11 | pass 12 | 13 | import psutil 14 | from config import Args 15 | from pydantic import BaseModel, Field 16 | from PIL import Image 17 | from util import ParamsModel 18 | import math 19 | 20 | from pruna import smash, SmashConfig 21 | from pruna.telemetry import set_telemetry_metrics 22 | 23 | set_telemetry_metrics(False) # disable telemetry for current session 24 | set_telemetry_metrics(False, set_as_default=True) # disable telemetry globally 25 | 26 | 27 | base_model = "stabilityai/sdxl-turbo" 28 | taesd_model = "madebyollin/taesdxl" 29 | 30 | default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux" 31 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated" 32 | page_content = """ 33 |

Real-Time SDXL Turbo

34 |

Image-to-Image

35 |

36 | This demo showcases 37 | SDXL Turbo 41 | Image to Image pipeline using 42 | Diffusers with a MJPEG stream server. 47 |

48 |

49 | Change the prompt to generate different images, accepts Compel syntax. 54 |

55 | """ 56 | 57 | 58 | class Pipeline: 59 | class Info(BaseModel): 60 | name: str = "img2img" 61 | title: str = "Image-to-Image SDXL" 62 | description: str = "Generates an image from a text prompt" 63 | input_mode: str = "image" 64 | page_content: str = page_content 65 | 66 | class InputParams(ParamsModel): 67 | prompt: str = Field( 68 | default_prompt, 69 | title="Prompt", 70 | field="textarea", 71 | id="prompt", 72 | ) 73 | negative_prompt: str = Field( 74 | default_negative_prompt, 75 | title="Negative Prompt", 76 | field="textarea", 77 | id="negative_prompt", 78 | hide=True, 79 | ) 80 | seed: int = Field( 81 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed" 82 | ) 83 | steps: int = Field( 84 | 1, min=1, max=10, title="Steps", field="range", hide=True, id="steps" 85 | ) 86 | width: int = Field( 87 | 768, min=2, max=15, title="Width", disabled=True, hide=True, id="width" 88 | ) 89 | height: int = Field( 90 | 768, min=2, max=15, title="Height", disabled=True, hide=True, id="height" 91 | ) 92 | guidance_scale: float = Field( 93 | 1.0, 94 | min=0, 95 | max=1, 96 | step=0.001, 97 | title="Guidance Scale", 98 | field="range", 99 | hide=True, 100 | id="guidance_scale", 101 | ) 102 | strength: float = Field( 103 | 0.5, 104 | min=0.25, 105 | max=1.0, 106 | step=0.001, 107 | title="Strength", 108 | field="range", 109 | hide=True, 110 | id="strength", 111 | ) 112 | 113 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype): 114 | base_pipe = AutoPipelineForImage2Image.from_pretrained( 115 | base_model, 116 | safety_checker=None, 117 | ) 118 | self.pipe = None 119 | if args.taesd: 120 | self.pipe.vae = AutoencoderTiny.from_pretrained( 121 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True 122 | ).to(device) 123 | 124 | if args.sfast: 125 | from sfast.compilers.stable_diffusion_pipeline_compiler import ( 126 | compile, 127 | CompilationConfig, 128 | ) 129 | 130 | config = CompilationConfig.Default() 131 | config.enable_xformers = True 132 | config.enable_triton = True 133 | config.enable_cuda_graph = True 134 | self.pipe = compile(self.pipe, config=config) 135 | 136 | if device.type != "mps": 137 | self.pipe.unet.to(memory_format=torch.channels_last) 138 | 139 | if args.pruna: 140 | # Create and smash your model 141 | smash_config = SmashConfig() 142 | smash_config["cacher"] = "deepcache" 143 | smash_config["compiler"] = "stable_fast" 144 | self.pipe = smash(model=base_pipe, smash_config=smash_config) 145 | 146 | if args.torch_compile: 147 | print("Running torch compile") 148 | self.pipe.unet = torch.compile( 149 | self.pipe.unet, mode="reduce-overhead", fullgraph=True 150 | ) 151 | self.pipe.vae = torch.compile( 152 | self.pipe.vae, mode="reduce-overhead", fullgraph=True 153 | ) 154 | self.pipe( 155 | prompt="warmup", 156 | image=[Image.new("RGB", (768, 768))], 157 | ) 158 | 159 | if args.compel: 160 | self.pipe.compel_proc = Compel( 161 | tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2], 162 | text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2], 163 | returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, 164 | requires_pooled=[False, True], 165 | ) 166 | 167 | self.pipe.set_progress_bar_config(disable=True) 168 | self.pipe.to(device=device, dtype=torch_dtype) 169 | 170 | def predict(self, params: "Pipeline.InputParams") -> Image.Image: 171 | generator = torch.manual_seed(params.seed) 172 | prompt = params.prompt 173 | negative_prompt = params.negative_prompt 174 | prompt_embeds = None 175 | pooled_prompt_embeds = None 176 | negative_prompt_embeds = None 177 | negative_pooled_prompt_embeds = None 178 | if hasattr(self.pipe, "compel_proc"): 179 | _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc( 180 | [params.prompt, params.negative_prompt] 181 | ) 182 | prompt = None 183 | negative_prompt = None 184 | prompt_embeds = _prompt_embeds[0:1] 185 | pooled_prompt_embeds = pooled_prompt_embeds[0:1] 186 | negative_prompt_embeds = _prompt_embeds[1:2] 187 | negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2] 188 | 189 | steps = params.steps 190 | strength = params.strength 191 | if int(steps * strength) < 1: 192 | steps = math.ceil(1 / max(0.10, strength)) 193 | 194 | results = self.pipe( 195 | image=params.image, 196 | prompt=prompt, 197 | negative_prompt=negative_prompt, 198 | prompt_embeds=prompt_embeds, 199 | pooled_prompt_embeds=pooled_prompt_embeds, 200 | negative_prompt_embeds=negative_prompt_embeds, 201 | negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, 202 | generator=generator, 203 | strength=strength, 204 | num_inference_steps=steps, 205 | guidance_scale=params.guidance_scale, 206 | width=params.width, 207 | height=params.height, 208 | output_type="pil", 209 | ) 210 | 211 | return results.images[0] 212 | -------------------------------------------------------------------------------- /server/pipelines/img2imgSDXS512.py: -------------------------------------------------------------------------------- 1 | from diffusers import AutoPipelineForImage2Image, AutoencoderTiny 2 | from compel import Compel 3 | import torch 4 | 5 | try: 6 | import intel_extension_for_pytorch as ipex # type: ignore 7 | except: 8 | pass 9 | 10 | import psutil 11 | from config import Args 12 | from pydantic import BaseModel, Field 13 | from PIL import Image 14 | from util import ParamsModel 15 | import math 16 | 17 | base_model = "IDKiro/sdxs-512-0.9" 18 | taesd_model = "madebyollin/taesd" 19 | 20 | default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece" 21 | page_content = """ 22 |

Real-Time Latent SDXS

23 |

Image-to-Image SDXS

24 |

25 | This demo showcases 26 | LCM 30 | Image to Image pipeline using 31 | Diffusers with a MJPEG stream server. 36 |

37 |

38 | Change the prompt to generate different images, accepts Compel syntax. 43 |

44 | """ 45 | 46 | 47 | class Pipeline: 48 | class Info(BaseModel): 49 | name: str = "img2img" 50 | title: str = "Image-to-Image SDXS" 51 | description: str = "Generates an image from a text prompt" 52 | input_mode: str = "image" 53 | page_content: str = page_content 54 | 55 | class InputParams(ParamsModel): 56 | prompt: str = Field( 57 | default_prompt, 58 | title="Prompt", 59 | field="textarea", 60 | id="prompt", 61 | ) 62 | seed: int = Field( 63 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed" 64 | ) 65 | steps: int = Field( 66 | 1, min=1, max=15, title="Steps", field="range", hide=True, id="steps" 67 | ) 68 | width: int = Field( 69 | 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width" 70 | ) 71 | height: int = Field( 72 | 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height" 73 | ) 74 | guidance_scale: float = Field( 75 | 0.0, 76 | min=0, 77 | max=20, 78 | step=0.001, 79 | title="Guidance Scale", 80 | field="range", 81 | hide=True, 82 | id="guidance_scale", 83 | ) 84 | strength: float = Field( 85 | 0.5, 86 | min=0.25, 87 | max=1.0, 88 | step=0.001, 89 | title="Strength", 90 | field="range", 91 | hide=True, 92 | id="strength", 93 | ) 94 | 95 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype): 96 | self.pipe = AutoPipelineForImage2Image.from_pretrained( 97 | base_model, 98 | safety_checker=None, 99 | ) 100 | if args.taesd: 101 | self.pipe.vae = AutoencoderTiny.from_pretrained( 102 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True 103 | ).to(device) 104 | 105 | if args.sfast: 106 | from sfast.compilers.stable_diffusion_pipeline_compiler import ( 107 | compile, 108 | CompilationConfig, 109 | ) 110 | 111 | config = CompilationConfig.Default() 112 | config.enable_xformers = True 113 | config.enable_triton = True 114 | config.enable_cuda_graph = True 115 | self.pipe = compile(self.pipe, config=config) 116 | 117 | self.pipe.set_progress_bar_config(disable=True) 118 | self.pipe.to(device=device, dtype=torch_dtype) 119 | if device.type != "mps": 120 | self.pipe.unet.to(memory_format=torch.channels_last) 121 | 122 | if args.torch_compile: 123 | print("Running torch compile") 124 | self.pipe.unet = torch.compile( 125 | self.pipe.unet, mode="reduce-overhead", fullgraph=True 126 | ) 127 | self.pipe.vae = torch.compile( 128 | self.pipe.vae, mode="reduce-overhead", fullgraph=True 129 | ) 130 | 131 | self.pipe( 132 | prompt="warmup", 133 | image=[Image.new("RGB", (768, 768))], 134 | ) 135 | 136 | if args.compel: 137 | self.compel_proc = Compel( 138 | tokenizer=self.pipe.tokenizer, 139 | text_encoder=self.pipe.text_encoder, 140 | truncate_long_prompts=False, 141 | ) 142 | 143 | def predict(self, params: "Pipeline.InputParams") -> Image.Image: 144 | generator = torch.manual_seed(params.seed) 145 | prompt_embeds = None 146 | prompt = params.prompt 147 | if hasattr(self, "compel_proc"): 148 | prompt_embeds = self.compel_proc(params.prompt) 149 | prompt = None 150 | 151 | results = self.pipe( 152 | image=params.image, 153 | prompt=prompt, 154 | prompt_embeds=prompt_embeds, 155 | generator=generator, 156 | strength=params.strength, 157 | num_inference_steps=params.steps, 158 | guidance_scale=params.guidance_scale, 159 | width=params.width, 160 | height=params.height, 161 | output_type="pil", 162 | ) 163 | return results.images[0] 164 | -------------------------------------------------------------------------------- /server/pipelines/img2imgSegmindVegaRT.py: -------------------------------------------------------------------------------- 1 | from diffusers import ( 2 | AutoPipelineForImage2Image, 3 | LCMScheduler, 4 | AutoencoderTiny, 5 | ) 6 | from compel import Compel, ReturnedEmbeddingsType 7 | import torch 8 | 9 | try: 10 | import intel_extension_for_pytorch as ipex # type: ignore 11 | except: 12 | pass 13 | 14 | import psutil 15 | from config import Args 16 | from pydantic import BaseModel, Field 17 | from util import ParamsModel 18 | from PIL import Image 19 | import math 20 | 21 | base_model = "segmind/Segmind-Vega" 22 | lora_model = "segmind/Segmind-VegaRT" 23 | taesd_model = "madebyollin/taesdxl" 24 | 25 | default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux" 26 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated" 27 | page_content = """ 28 |

Real-Time SegmindVegaRT

29 |

Image-to-Image

30 |

31 | This demo showcases 32 | SegmindVegaRT 36 | Image to Image pipeline using 37 | Diffusers with a MJPEG stream server. 42 |

43 |

44 | Change the prompt to generate different images, accepts Compel syntax. 49 |

50 | """ 51 | 52 | 53 | class Pipeline: 54 | class Info(BaseModel): 55 | name: str = "img2img" 56 | title: str = "Image-to-Image Playground 256" 57 | description: str = "Generates an image from a text prompt" 58 | input_mode: str = "image" 59 | page_content: str = page_content 60 | 61 | class InputParams(ParamsModel): 62 | prompt: str = Field( 63 | default_prompt, 64 | title="Prompt", 65 | field="textarea", 66 | id="prompt", 67 | ) 68 | negative_prompt: str = Field( 69 | default_negative_prompt, 70 | title="Negative Prompt", 71 | field="textarea", 72 | id="negative_prompt", 73 | hide=True, 74 | ) 75 | seed: int = Field( 76 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed" 77 | ) 78 | steps: int = Field( 79 | 1, min=1, max=10, title="Steps", field="range", hide=True, id="steps" 80 | ) 81 | width: int = Field( 82 | 1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width" 83 | ) 84 | height: int = Field( 85 | 1024, min=2, max=15, title="Height", disabled=True, hide=True, id="height" 86 | ) 87 | guidance_scale: float = Field( 88 | 0.0, 89 | min=0, 90 | max=1, 91 | step=0.001, 92 | title="Guidance Scale", 93 | field="range", 94 | hide=True, 95 | id="guidance_scale", 96 | ) 97 | strength: float = Field( 98 | 0.5, 99 | min=0.25, 100 | max=1.0, 101 | step=0.001, 102 | title="Strength", 103 | field="range", 104 | hide=True, 105 | id="strength", 106 | ) 107 | 108 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype): 109 | self.pipe = AutoPipelineForImage2Image.from_pretrained( 110 | base_model, 111 | safety_checker=None, 112 | variant="fp16", 113 | ) 114 | if args.taesd: 115 | self.pipe.vae = AutoencoderTiny.from_pretrained( 116 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True 117 | ).to(device) 118 | 119 | self.pipe.load_lora_weights(lora_model) 120 | self.pipe.fuse_lora() 121 | self.pipe.scheduler = LCMScheduler.from_pretrained( 122 | base_model, subfolder="scheduler" 123 | ) 124 | if args.sfast: 125 | from sfast.compilers.stable_diffusion_pipeline_compiler import ( 126 | compile, 127 | CompilationConfig, 128 | ) 129 | 130 | config = CompilationConfig.Default() 131 | config.enable_xformers = True 132 | config.enable_triton = True 133 | config.enable_cuda_graph = True 134 | self.pipe = compile(self.pipe, config=config) 135 | 136 | self.pipe.set_progress_bar_config(disable=True) 137 | self.pipe.to(device=device, dtype=torch_dtype) 138 | if device.type != "mps": 139 | self.pipe.unet.to(memory_format=torch.channels_last) 140 | 141 | if args.torch_compile: 142 | print("Running torch compile") 143 | self.pipe.unet = torch.compile( 144 | self.pipe.unet, mode="reduce-overhead", fullgraph=False 145 | ) 146 | self.pipe.vae = torch.compile( 147 | self.pipe.vae, mode="reduce-overhead", fullgraph=False 148 | ) 149 | 150 | self.pipe( 151 | prompt="warmup", 152 | image=[Image.new("RGB", (768, 768))], 153 | ) 154 | if args.compel: 155 | self.pipe.compel_proc = Compel( 156 | tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2], 157 | text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2], 158 | returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, 159 | requires_pooled=[False, True], 160 | ) 161 | 162 | def predict(self, params: "Pipeline.InputParams") -> Image.Image: 163 | generator = torch.manual_seed(params.seed) 164 | prompt = params.prompt 165 | negative_prompt = params.negative_prompt 166 | prompt_embeds = None 167 | pooled_prompt_embeds = None 168 | negative_prompt_embeds = None 169 | negative_pooled_prompt_embeds = None 170 | if hasattr(self.pipe, "compel_proc"): 171 | _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc( 172 | [params.prompt, params.negative_prompt] 173 | ) 174 | prompt = None 175 | negative_prompt = None 176 | prompt_embeds = _prompt_embeds[0:1] 177 | pooled_prompt_embeds = pooled_prompt_embeds[0:1] 178 | negative_prompt_embeds = _prompt_embeds[1:2] 179 | negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2] 180 | 181 | steps = params.steps 182 | strength = params.strength 183 | if int(steps * strength) < 1: 184 | steps = math.ceil(1 / max(0.10, strength)) 185 | 186 | results = self.pipe( 187 | image=params.image, 188 | prompt=prompt, 189 | negative_prompt=negative_prompt, 190 | prompt_embeds=prompt_embeds, 191 | pooled_prompt_embeds=pooled_prompt_embeds, 192 | negative_prompt_embeds=negative_prompt_embeds, 193 | negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, 194 | generator=generator, 195 | strength=strength, 196 | num_inference_steps=steps, 197 | guidance_scale=params.guidance_scale, 198 | width=params.width, 199 | height=params.height, 200 | output_type="pil", 201 | ) 202 | 203 | return results.images[0] 204 | -------------------------------------------------------------------------------- /server/pipelines/pix2pix/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/radames/Real-Time-Latent-Consistency-Model/269f4347d93eb8e366e12b0f8f216c8b11262e76/server/pipelines/pix2pix/__init__.py -------------------------------------------------------------------------------- /server/pipelines/pix2pix/model.py: -------------------------------------------------------------------------------- 1 | # https://github.com/GaParmar/img2img-turbo/blob/main/src/model.py 2 | from diffusers import DDPMScheduler 3 | 4 | 5 | def make_1step_sched(): 6 | noise_scheduler_1step = DDPMScheduler.from_pretrained( 7 | "stabilityai/sd-turbo", subfolder="scheduler" 8 | ) 9 | noise_scheduler_1step.set_timesteps(1, device="cuda") 10 | noise_scheduler_1step.alphas_cumprod = noise_scheduler_1step.alphas_cumprod.cuda() 11 | return noise_scheduler_1step 12 | 13 | 14 | def my_vae_encoder_fwd(self, sample): 15 | sample = self.conv_in(sample) 16 | l_blocks = [] 17 | # down 18 | for down_block in self.down_blocks: 19 | l_blocks.append(sample) 20 | sample = down_block(sample) 21 | # middle 22 | sample = self.mid_block(sample) 23 | sample = self.conv_norm_out(sample) 24 | sample = self.conv_act(sample) 25 | sample = self.conv_out(sample) 26 | self.current_down_blocks = l_blocks 27 | return sample 28 | 29 | 30 | def my_vae_decoder_fwd(self, sample, latent_embeds=None): 31 | sample = self.conv_in(sample) 32 | upscale_dtype = next(iter(self.up_blocks.parameters())).dtype 33 | # middle 34 | sample = self.mid_block(sample, latent_embeds) 35 | sample = sample.to(upscale_dtype) 36 | if not self.ignore_skip: 37 | skip_convs = [ 38 | self.skip_conv_1, 39 | self.skip_conv_2, 40 | self.skip_conv_3, 41 | self.skip_conv_4, 42 | ] 43 | # up 44 | for idx, up_block in enumerate(self.up_blocks): 45 | skip_in = skip_convs[idx](self.incoming_skip_acts[::-1][idx] * self.gamma) 46 | # add skip 47 | sample = sample + skip_in 48 | sample = up_block(sample, latent_embeds) 49 | else: 50 | for idx, up_block in enumerate(self.up_blocks): 51 | sample = up_block(sample, latent_embeds) 52 | # post-process 53 | if latent_embeds is None: 54 | sample = self.conv_norm_out(sample) 55 | else: 56 | sample = self.conv_norm_out(sample, latent_embeds) 57 | sample = self.conv_act(sample) 58 | sample = self.conv_out(sample) 59 | return sample 60 | -------------------------------------------------------------------------------- /server/pipelines/pix2pix/pix2pix_turbo.py: -------------------------------------------------------------------------------- 1 | # https://github.com/GaParmar/img2img-turbo/blob/main/src/pix2pix_turbo.py 2 | import os 3 | import requests 4 | import sys 5 | import pdb 6 | import copy 7 | from tqdm import tqdm 8 | import torch 9 | from transformers import AutoTokenizer, PretrainedConfig, CLIPTextModel 10 | from diffusers import AutoencoderKL, UNet2DConditionModel, DDPMScheduler 11 | from diffusers.utils.peft_utils import set_weights_and_activate_adapters 12 | from peft import LoraConfig 13 | 14 | from pipelines.pix2pix.model import ( 15 | make_1step_sched, 16 | my_vae_encoder_fwd, 17 | my_vae_decoder_fwd, 18 | ) 19 | 20 | 21 | class TwinConv(torch.nn.Module): 22 | def __init__(self, convin_pretrained, convin_curr): 23 | super(TwinConv, self).__init__() 24 | self.conv_in_pretrained = copy.deepcopy(convin_pretrained) 25 | self.conv_in_curr = copy.deepcopy(convin_curr) 26 | self.r = None 27 | 28 | def forward(self, x): 29 | x1 = self.conv_in_pretrained(x).detach() 30 | x2 = self.conv_in_curr(x) 31 | return x1 * (1 - self.r) + x2 * (self.r) 32 | 33 | 34 | class Pix2Pix_Turbo(torch.nn.Module): 35 | def __init__(self, name, ckpt_folder="checkpoints"): 36 | super().__init__() 37 | self.tokenizer = AutoTokenizer.from_pretrained( 38 | "stabilityai/sd-turbo", subfolder="tokenizer" 39 | ) 40 | self.text_encoder = CLIPTextModel.from_pretrained( 41 | "stabilityai/sd-turbo", subfolder="text_encoder" 42 | ).cuda() 43 | self.sched = make_1step_sched() 44 | 45 | vae = AutoencoderKL.from_pretrained("stabilityai/sd-turbo", subfolder="vae") 46 | unet = UNet2DConditionModel.from_pretrained( 47 | "stabilityai/sd-turbo", subfolder="unet" 48 | ) 49 | 50 | if name == "edge_to_image": 51 | url = "https://www.cs.cmu.edu/~img2img-turbo/models/edge_to_image_loras.pkl" 52 | os.makedirs(ckpt_folder, exist_ok=True) 53 | outf = os.path.join(ckpt_folder, "edge_to_image_loras.pkl") 54 | if not os.path.exists(outf): 55 | print(f"Downloading checkpoint to {outf}") 56 | response = requests.get(url, stream=True) 57 | total_size_in_bytes = int(response.headers.get("content-length", 0)) 58 | block_size = 1024 # 1 Kibibyte 59 | progress_bar = tqdm( 60 | total=total_size_in_bytes, unit="iB", unit_scale=True 61 | ) 62 | with open(outf, "wb") as file: 63 | for data in response.iter_content(block_size): 64 | progress_bar.update(len(data)) 65 | file.write(data) 66 | progress_bar.close() 67 | if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes: 68 | print("ERROR, something went wrong") 69 | print(f"Downloaded successfully to {outf}") 70 | p_ckpt = outf 71 | sd = torch.load(p_ckpt, map_location="cpu") 72 | unet_lora_config = LoraConfig( 73 | r=sd["rank_unet"], 74 | init_lora_weights="gaussian", 75 | target_modules=sd["unet_lora_target_modules"], 76 | ) 77 | 78 | if name == "sketch_to_image_stochastic": 79 | # download from url 80 | url = "https://www.cs.cmu.edu/~img2img-turbo/models/sketch_to_image_stochastic_lora.pkl" 81 | os.makedirs(ckpt_folder, exist_ok=True) 82 | outf = os.path.join(ckpt_folder, "sketch_to_image_stochastic_lora.pkl") 83 | if not os.path.exists(outf): 84 | print(f"Downloading checkpoint to {outf}") 85 | response = requests.get(url, stream=True) 86 | total_size_in_bytes = int(response.headers.get("content-length", 0)) 87 | block_size = 1024 # 1 Kibibyte 88 | progress_bar = tqdm( 89 | total=total_size_in_bytes, unit="iB", unit_scale=True 90 | ) 91 | with open(outf, "wb") as file: 92 | for data in response.iter_content(block_size): 93 | progress_bar.update(len(data)) 94 | file.write(data) 95 | progress_bar.close() 96 | if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes: 97 | print("ERROR, something went wrong") 98 | print(f"Downloaded successfully to {outf}") 99 | p_ckpt = outf 100 | sd = torch.load(p_ckpt, map_location="cpu") 101 | unet_lora_config = LoraConfig( 102 | r=sd["rank_unet"], 103 | init_lora_weights="gaussian", 104 | target_modules=sd["unet_lora_target_modules"], 105 | ) 106 | convin_pretrained = copy.deepcopy(unet.conv_in) 107 | unet.conv_in = TwinConv(convin_pretrained, unet.conv_in) 108 | 109 | vae.encoder.forward = my_vae_encoder_fwd.__get__( 110 | vae.encoder, vae.encoder.__class__ 111 | ) 112 | vae.decoder.forward = my_vae_decoder_fwd.__get__( 113 | vae.decoder, vae.decoder.__class__ 114 | ) 115 | # add the skip connection convs 116 | vae.decoder.skip_conv_1 = torch.nn.Conv2d( 117 | 512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False 118 | ).cuda() 119 | vae.decoder.skip_conv_2 = torch.nn.Conv2d( 120 | 256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False 121 | ).cuda() 122 | vae.decoder.skip_conv_3 = torch.nn.Conv2d( 123 | 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False 124 | ).cuda() 125 | vae.decoder.skip_conv_4 = torch.nn.Conv2d( 126 | 128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False 127 | ).cuda() 128 | vae_lora_config = LoraConfig( 129 | r=sd["rank_vae"], 130 | init_lora_weights="gaussian", 131 | target_modules=sd["vae_lora_target_modules"], 132 | ) 133 | vae.decoder.ignore_skip = False 134 | vae.add_adapter(vae_lora_config, adapter_name="vae_skip") 135 | unet.add_adapter(unet_lora_config) 136 | _sd_unet = unet.state_dict() 137 | for k in sd["state_dict_unet"]: 138 | _sd_unet[k] = sd["state_dict_unet"][k] 139 | unet.load_state_dict(_sd_unet) 140 | unet.enable_xformers_memory_efficient_attention() 141 | _sd_vae = vae.state_dict() 142 | for k in sd["state_dict_vae"]: 143 | _sd_vae[k] = sd["state_dict_vae"][k] 144 | vae.load_state_dict(_sd_vae) 145 | unet.to("cuda") 146 | vae.to("cuda") 147 | unet.eval() 148 | vae.eval() 149 | self.unet, self.vae = unet, vae 150 | self.vae.decoder.gamma = 1 151 | self.timesteps = torch.tensor([999], device="cuda").long() 152 | self.last_prompt = "" 153 | self.caption_enc = None 154 | self.device = "cuda" 155 | 156 | @torch.no_grad() 157 | def forward(self, c_t, prompt, deterministic=True, r=1.0, noise_map=1.0): 158 | # encode the text prompt 159 | if prompt != self.last_prompt: 160 | caption_tokens = self.tokenizer( 161 | prompt, 162 | max_length=self.tokenizer.model_max_length, 163 | padding="max_length", 164 | truncation=True, 165 | return_tensors="pt", 166 | ).input_ids.cuda() 167 | caption_enc = self.text_encoder(caption_tokens)[0] 168 | self.caption_enc = caption_enc 169 | self.last_prompt = prompt 170 | 171 | if deterministic: 172 | encoded_control = ( 173 | self.vae.encode(c_t).latent_dist.sample() 174 | * self.vae.config.scaling_factor 175 | ) 176 | model_pred = self.unet( 177 | encoded_control, 178 | self.timesteps, 179 | encoder_hidden_states=self.caption_enc, 180 | ).sample 181 | x_denoised = self.sched.step( 182 | model_pred, self.timesteps, encoded_control, return_dict=True 183 | ).prev_sample 184 | self.vae.decoder.incoming_skip_acts = self.vae.encoder.current_down_blocks 185 | output_image = ( 186 | self.vae.decode(x_denoised / self.vae.config.scaling_factor).sample 187 | ).clamp(-1, 1) 188 | else: 189 | # scale the lora weights based on the r value 190 | self.unet.set_adapters(["default"], weights=[r]) 191 | set_weights_and_activate_adapters(self.vae, ["vae_skip"], [r]) 192 | encoded_control = ( 193 | self.vae.encode(c_t).latent_dist.sample() 194 | * self.vae.config.scaling_factor 195 | ) 196 | # combine the input and noise 197 | unet_input = encoded_control * r + noise_map * (1 - r) 198 | self.unet.conv_in.r = r 199 | unet_output = self.unet( 200 | unet_input, 201 | self.timesteps, 202 | encoder_hidden_states=self.caption_enc, 203 | ).sample 204 | self.unet.conv_in.r = None 205 | x_denoised = self.sched.step( 206 | unet_output, self.timesteps, unet_input, return_dict=True 207 | ).prev_sample 208 | self.vae.decoder.incoming_skip_acts = self.vae.encoder.current_down_blocks 209 | self.vae.decoder.gamma = r 210 | output_image = ( 211 | self.vae.decode(x_denoised / self.vae.config.scaling_factor).sample 212 | ).clamp(-1, 1) 213 | return output_image 214 | -------------------------------------------------------------------------------- /server/pipelines/pix2pixTurbo.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision import transforms 3 | 4 | from config import Args 5 | from pydantic import BaseModel, Field 6 | from util import ParamsModel 7 | from PIL import Image 8 | from pipelines.pix2pix.pix2pix_turbo import Pix2Pix_Turbo 9 | from pipelines.utils.canny_gpu import ScharrOperator 10 | 11 | default_prompt = "close-up photo of the joker" 12 | page_content = """ 13 |

Real-Time pix2pix_turbo

14 |

pix2pix turbo

15 |

16 | This demo showcases 17 | One-Step Image Translation with Text-to-Image Models 21 | 22 |

23 |

24 | Web app 25 | Real-Time Latent Consistency Models 26 | 27 |

28 | """ 29 | 30 | 31 | class Pipeline: 32 | class Info(BaseModel): 33 | name: str = "img2img" 34 | title: str = "Image-to-Image SDXL" 35 | description: str = "Generates an image from a text prompt" 36 | input_mode: str = "image" 37 | page_content: str = page_content 38 | 39 | class InputParams(ParamsModel): 40 | prompt: str = Field( 41 | default_prompt, 42 | title="Prompt", 43 | field="textarea", 44 | id="prompt", 45 | ) 46 | 47 | width: int = Field( 48 | 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width" 49 | ) 50 | height: int = Field( 51 | 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height" 52 | ) 53 | seed: int = Field( 54 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed" 55 | ) 56 | noise_r: float = Field( 57 | 1.0, 58 | min=0.01, 59 | max=3.0, 60 | step=0.001, 61 | title="Noise R", 62 | field="range", 63 | hide=True, 64 | id="noise_r", 65 | ) 66 | 67 | deterministic: bool = Field( 68 | True, 69 | hide=True, 70 | title="Deterministic", 71 | field="checkbox", 72 | id="deterministic", 73 | ) 74 | canny_low_threshold: float = Field( 75 | 0.0, 76 | min=0, 77 | max=1.0, 78 | step=0.001, 79 | title="Canny Low Threshold", 80 | field="range", 81 | hide=True, 82 | id="canny_low_threshold", 83 | ) 84 | canny_high_threshold: float = Field( 85 | 1.0, 86 | min=0, 87 | max=1.0, 88 | step=0.001, 89 | title="Canny High Threshold", 90 | field="range", 91 | hide=True, 92 | id="canny_high_threshold", 93 | ) 94 | debug_canny: bool = Field( 95 | False, 96 | title="Debug Canny", 97 | field="checkbox", 98 | hide=True, 99 | id="debug_canny", 100 | ) 101 | 102 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype): 103 | self.model = Pix2Pix_Turbo("edge_to_image") 104 | self.canny_torch = ScharrOperator(device=device) 105 | self.device = device 106 | self.last_time = 0.0 107 | 108 | def predict(self, params: "Pipeline.InputParams") -> Image.Image: 109 | canny_pil, canny_tensor = self.canny_torch( 110 | params.image, 111 | params.canny_low_threshold, 112 | params.canny_high_threshold, 113 | output_type="pil,tensor", 114 | ) 115 | torch.manual_seed(params.seed) 116 | noise = torch.randn( 117 | (1, 4, params.width // 8, params.height // 8), device=self.device 118 | ) 119 | canny_tensor = torch.cat((canny_tensor, canny_tensor, canny_tensor), dim=1) 120 | output_image = self.model( 121 | canny_tensor, 122 | params.prompt, 123 | params.deterministic, 124 | params.noise_r, 125 | noise, 126 | ) 127 | output_pil = transforms.ToPILImage()(output_image[0].cpu() * 0.5 + 0.5) 128 | 129 | result_image = output_pil 130 | if params.debug_canny: 131 | # paste control_image on top of result_image 132 | w0, h0 = (200, 200) 133 | control_image = canny_pil.resize((w0, h0)) 134 | w1, h1 = result_image.size 135 | result_image.paste(control_image, (w1 - w0, h1 - h0)) 136 | return result_image 137 | -------------------------------------------------------------------------------- /server/pipelines/txt2img.py: -------------------------------------------------------------------------------- 1 | from diffusers import DiffusionPipeline, AutoencoderTiny 2 | from compel import Compel 3 | import torch 4 | 5 | try: 6 | import intel_extension_for_pytorch as ipex # type: ignore 7 | except: 8 | pass 9 | 10 | from config import Args 11 | from pydantic import BaseModel, Field 12 | from util import ParamsModel 13 | from PIL import Image 14 | from typing import List 15 | from pruna import SmashConfig, smash 16 | 17 | base_model = "SimianLuo/LCM_Dreamshaper_v7" 18 | taesd_model = "madebyollin/taesd" 19 | 20 | default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece" 21 | 22 | page_content = """

Real-Time Latent Consistency Model

23 |

Text-to-Image

24 |

25 | This demo showcases 26 | LCM 30 | Image to Image pipeline using 31 | Diffusers with a MJPEG stream server 35 |

36 |

37 | Change the prompt to generate different images, accepts Compel syntax. 42 |

""" 43 | 44 | 45 | class Pipeline: 46 | class Info(BaseModel): 47 | name: str = "txt2img" 48 | title: str = "Text-to-Image LCM" 49 | description: str = "Generates an image from a text prompt" 50 | input_mode: str = "text" 51 | page_content: str = page_content 52 | 53 | class InputParams(ParamsModel): 54 | prompt: str = Field( 55 | default_prompt, 56 | title="Prompt", 57 | field="textarea", 58 | id="prompt", 59 | ) 60 | seed: int = Field( 61 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed" 62 | ) 63 | steps: int = Field( 64 | 4, min=2, max=15, title="Steps", field="range", hide=True, id="steps" 65 | ) 66 | width: int = Field( 67 | 768, min=2, max=15, title="Width", disabled=True, hide=True, id="width" 68 | ) 69 | height: int = Field( 70 | 768, min=2, max=15, title="Height", disabled=True, hide=True, id="height" 71 | ) 72 | guidance_scale: float = Field( 73 | 8.0, 74 | min=1, 75 | max=30, 76 | step=0.001, 77 | title="Guidance Scale", 78 | field="range", 79 | hide=True, 80 | id="guidance_scale", 81 | ) 82 | 83 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype): 84 | self.pipe = DiffusionPipeline.from_pretrained(base_model, safety_checker=None) 85 | if args.taesd: 86 | self.pipe.vae = AutoencoderTiny.from_pretrained( 87 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True 88 | ).to(device) 89 | 90 | if args.pruna: 91 | # Create and smash your model 92 | smash_config = SmashConfig() 93 | # smash_config["cacher"] = "deepcache" 94 | smash_config["compiler"] = "stable_fast" 95 | self.pipe = smash(model=self.pipe, smash_config=smash_config) 96 | 97 | if args.sfast: 98 | from sfast.compilers.stable_diffusion_pipeline_compiler import ( 99 | compile, 100 | CompilationConfig, 101 | ) 102 | 103 | config = CompilationConfig.Default() 104 | config.enable_xformers = True 105 | config.enable_triton = True 106 | config.enable_cuda_graph = True 107 | self.pipe = compile(self.pipe, config=config) 108 | 109 | self.pipe.set_progress_bar_config(disable=True) 110 | self.pipe.to(device=device, dtype=torch_dtype) 111 | if device.type != "mps": 112 | self.pipe.unet.to(memory_format=torch.channels_last) 113 | 114 | if args.torch_compile: 115 | self.pipe.unet = torch.compile( 116 | self.pipe.unet, mode="reduce-overhead", fullgraph=True 117 | ) 118 | self.pipe.vae = torch.compile( 119 | self.pipe.vae, mode="reduce-overhead", fullgraph=True 120 | ) 121 | 122 | self.pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0) 123 | 124 | if args.compel: 125 | self.compel_proc = Compel( 126 | tokenizer=self.pipe.tokenizer, 127 | text_encoder=self.pipe.text_encoder, 128 | truncate_long_prompts=False, 129 | ) 130 | 131 | def predict(self, params: "Pipeline.InputParams") -> Image.Image: 132 | generator = torch.manual_seed(params.seed) 133 | prompt_embeds = None 134 | prompt = params.prompt 135 | if hasattr(self, "compel_proc"): 136 | prompt_embeds = self.compel_proc(params.prompt) 137 | prompt = None 138 | 139 | results = self.pipe( 140 | prompt_embeds=prompt_embeds, 141 | prompt=prompt, 142 | generator=generator, 143 | num_inference_steps=params.steps, 144 | guidance_scale=params.guidance_scale, 145 | width=params.width, 146 | height=params.height, 147 | output_type="pil", 148 | ) 149 | 150 | return results.images[0] 151 | -------------------------------------------------------------------------------- /server/pipelines/txt2imgLora.py: -------------------------------------------------------------------------------- 1 | from diffusers import DiffusionPipeline, AutoencoderTiny, LCMScheduler 2 | from compel import Compel 3 | import torch 4 | 5 | try: 6 | import intel_extension_for_pytorch as ipex # type: ignore 7 | except: 8 | pass 9 | 10 | import psutil 11 | from config import Args 12 | from pydantic import BaseModel, Field 13 | from util import ParamsModel 14 | from PIL import Image 15 | from pruna import SmashConfig, smash 16 | 17 | base_model = "wavymulder/Analog-Diffusion" 18 | lcm_lora_id = "latent-consistency/lcm-lora-sdv1-5" 19 | taesd_model = "madebyollin/taesd" 20 | 21 | default_prompt = "Analog style photograph of young Harrison Ford as Han Solo, star wars behind the scenes" 22 | 23 | page_content = """ 24 |

Real-Time Latent Consistency Model SDv1.5

25 |

Text-to-Image LCM + LoRa

26 |

27 | This demo showcases 28 | LCM 32 | Image to Image pipeline using 33 | Diffusers with a MJPEG stream server. Featuring Analog-Diffusion 41 |

42 |

43 | Change the prompt to generate different images, accepts Compel syntax. 48 |

49 | """ 50 | 51 | 52 | class Pipeline: 53 | class Info(BaseModel): 54 | name: str = "controlnet" 55 | title: str = "Text-to-Image LCM + LoRa" 56 | description: str = "Generates an image from a text prompt" 57 | input_mode: str = "text" 58 | page_content: str = page_content 59 | 60 | class InputParams(ParamsModel): 61 | prompt: str = Field( 62 | default_prompt, 63 | title="Prompt", 64 | field="textarea", 65 | id="prompt", 66 | ) 67 | seed: int = Field( 68 | 8638236174640251, min=0, title="Seed", field="seed", hide=True, id="seed" 69 | ) 70 | steps: int = Field( 71 | 4, min=2, max=15, title="Steps", field="range", hide=True, id="steps" 72 | ) 73 | width: int = Field( 74 | 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width" 75 | ) 76 | height: int = Field( 77 | 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height" 78 | ) 79 | guidance_scale: float = Field( 80 | 0.2, 81 | min=0, 82 | max=4, 83 | step=0.001, 84 | title="Guidance Scale", 85 | field="range", 86 | hide=True, 87 | id="guidance_scale", 88 | ) 89 | 90 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype): 91 | self.pipe = DiffusionPipeline.from_pretrained(base_model, safety_checker=None) 92 | if args.taesd: 93 | self.pipe.vae = AutoencoderTiny.from_pretrained( 94 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True 95 | ).to(device) 96 | 97 | if args.pruna: 98 | # Create and smash your model 99 | smash_config = SmashConfig() 100 | # smash_config["cacher"] = "deepcache" 101 | smash_config["compiler"] = "stable_fast" 102 | self.pipe = smash(model=self.pipe, smash_config=smash_config) 103 | 104 | self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config) 105 | self.pipe.set_progress_bar_config(disable=True) 106 | self.pipe.load_lora_weights(lcm_lora_id, adapter_name="lcm") 107 | self.pipe.to(device=device, dtype=torch_dtype) 108 | 109 | if device.type != "mps": 110 | self.pipe.unet.to(memory_format=torch.channels_last) 111 | 112 | if args.torch_compile: 113 | self.pipe.unet = torch.compile( 114 | self.pipe.unet, mode="reduce-overhead", fullgraph=True 115 | ) 116 | self.pipe.vae = torch.compile( 117 | self.pipe.vae, mode="reduce-overhead", fullgraph=True 118 | ) 119 | 120 | self.pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0) 121 | 122 | if args.sfast: 123 | from sfast.compilers.stable_diffusion_pipeline_compiler import ( 124 | compile, 125 | CompilationConfig, 126 | ) 127 | 128 | config = CompilationConfig.Default() 129 | config.enable_xformers = True 130 | config.enable_triton = True 131 | config.enable_cuda_graph = True 132 | self.pipe = compile(self.pipe, config=config) 133 | 134 | if args.compel: 135 | self.compel_proc = Compel( 136 | tokenizer=self.pipe.tokenizer, 137 | text_encoder=self.pipe.text_encoder, 138 | truncate_long_prompts=False, 139 | ) 140 | 141 | def predict(self, params: "Pipeline.InputParams") -> Image.Image: 142 | generator = torch.manual_seed(params.seed) 143 | prompt_embeds = None 144 | prompt = params.prompt 145 | if hasattr(self, "compel_proc"): 146 | prompt_embeds = self.compel_proc(params.prompt) 147 | prompt = None 148 | 149 | results = self.pipe( 150 | prompt=prompt, 151 | prompt_embeds=prompt_embeds, 152 | generator=generator, 153 | num_inference_steps=params.steps, 154 | guidance_scale=params.guidance_scale, 155 | width=params.width, 156 | height=params.height, 157 | output_type="pil", 158 | ) 159 | 160 | return results.images[0] 161 | -------------------------------------------------------------------------------- /server/pipelines/txt2imgLoraSDXL.py: -------------------------------------------------------------------------------- 1 | from diffusers import DiffusionPipeline, LCMScheduler, AutoencoderKL, AutoencoderTiny 2 | from compel import Compel, ReturnedEmbeddingsType 3 | import torch 4 | 5 | try: 6 | import intel_extension_for_pytorch as ipex # type: ignore 7 | except: 8 | pass 9 | 10 | import psutil 11 | from config import Args 12 | from pydantic import BaseModel, Field 13 | from util import ParamsModel 14 | from PIL import Image 15 | 16 | model_id = "stabilityai/stable-diffusion-xl-base-1.0" 17 | lcm_lora_id = "latent-consistency/lcm-lora-sdxl" 18 | taesd_model = "madebyollin/taesdxl" 19 | 20 | 21 | default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux" 22 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated" 23 | page_content = """ 24 |

Real-Time Latent Consistency Model

25 |

Text-to-Image SDXL + LCM + LoRA

26 |

27 | This demo showcases 28 | LCM LoRA 33 | Text to Image pipeline using 34 | Diffusers with a MJPEG stream server. 39 |

40 |

41 | Change the prompt to generate different images, accepts Compel syntax. 46 |

47 | """ 48 | 49 | 50 | class Pipeline: 51 | class Info(BaseModel): 52 | name: str = "LCM+Lora+SDXL" 53 | title: str = "Text-to-Image SDXL + LCM + LoRA" 54 | description: str = "Generates an image from a text prompt" 55 | page_content: str = page_content 56 | input_mode: str = "text" 57 | 58 | class InputParams(ParamsModel): 59 | prompt: str = Field( 60 | default_prompt, 61 | title="Prompt", 62 | field="textarea", 63 | id="prompt", 64 | ) 65 | negative_prompt: str = Field( 66 | default_negative_prompt, 67 | title="Negative Prompt", 68 | field="textarea", 69 | id="negative_prompt", 70 | hide=True, 71 | ) 72 | seed: int = Field( 73 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed" 74 | ) 75 | steps: int = Field( 76 | 4, min=1, max=15, title="Steps", field="range", hide=True, id="steps" 77 | ) 78 | width: int = Field( 79 | 1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width" 80 | ) 81 | height: int = Field( 82 | 1024, min=2, max=15, title="Height", disabled=True, hide=True, id="height" 83 | ) 84 | guidance_scale: float = Field( 85 | 1.0, 86 | min=0, 87 | max=20, 88 | step=0.001, 89 | title="Guidance Scale", 90 | field="range", 91 | hide=True, 92 | id="guidance_scale", 93 | ) 94 | 95 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype): 96 | vae = AutoencoderKL.from_pretrained( 97 | "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype 98 | ) 99 | 100 | self.pipe = DiffusionPipeline.from_pretrained( 101 | model_id, 102 | safety_checker=None, 103 | vae=vae, 104 | ) 105 | # Load LCM LoRA 106 | self.pipe.load_lora_weights(lcm_lora_id, adapter_name="lcm") 107 | self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config) 108 | self.pipe.set_progress_bar_config(disable=True) 109 | self.pipe.to(device=device, dtype=torch_dtype).to(device) 110 | 111 | if args.sfast: 112 | from sfast.compilers.stable_diffusion_pipeline_compiler import ( 113 | compile, 114 | CompilationConfig, 115 | ) 116 | 117 | config = CompilationConfig.Default() 118 | config.enable_xformers = True 119 | config.enable_triton = True 120 | config.enable_cuda_graph = True 121 | self.pipe = compile(self.pipe, config=config) 122 | 123 | if device.type != "mps": 124 | self.pipe.unet.to(memory_format=torch.channels_last) 125 | 126 | self.pipe.compel_proc = Compel( 127 | tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2], 128 | text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2], 129 | returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, 130 | requires_pooled=[False, True], 131 | ) 132 | if args.taesd: 133 | self.pipe.vae = AutoencoderTiny.from_pretrained( 134 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True 135 | ).to(device) 136 | 137 | if args.torch_compile: 138 | self.pipe.unet = torch.compile( 139 | self.pipe.unet, mode="reduce-overhead", fullgraph=True 140 | ) 141 | self.pipe.vae = torch.compile( 142 | self.pipe.vae, mode="reduce-overhead", fullgraph=True 143 | ) 144 | self.pipe( 145 | prompt="warmup", 146 | ) 147 | 148 | def predict(self, params: "Pipeline.InputParams") -> Image.Image: 149 | generator = torch.manual_seed(params.seed) 150 | 151 | prompt = params.prompt 152 | negative_prompt = params.negative_prompt 153 | prompt_embeds = None 154 | pooled_prompt_embeds = None 155 | negative_prompt_embeds = None 156 | negative_pooled_prompt_embeds = None 157 | if hasattr(self.pipe, "compel_proc"): 158 | _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc( 159 | [params.prompt, params.negative_prompt] 160 | ) 161 | prompt = None 162 | negative_prompt = None 163 | prompt_embeds = _prompt_embeds[0:1] 164 | pooled_prompt_embeds = pooled_prompt_embeds[0:1] 165 | negative_prompt_embeds = _prompt_embeds[1:2] 166 | negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2] 167 | 168 | results = self.pipe( 169 | prompt=prompt, 170 | negative_prompt=negative_prompt, 171 | prompt_embeds=prompt_embeds, 172 | pooled_prompt_embeds=pooled_prompt_embeds, 173 | negative_prompt_embeds=negative_prompt_embeds, 174 | negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, 175 | generator=generator, 176 | num_inference_steps=params.steps, 177 | guidance_scale=params.guidance_scale, 178 | width=params.width, 179 | height=params.height, 180 | output_type="pil", 181 | ) 182 | 183 | return results.images[0] 184 | -------------------------------------------------------------------------------- /server/pipelines/utils/canny_gpu.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torchvision.transforms import ToTensor, ToPILImage 4 | from PIL import Image 5 | 6 | 7 | class SobelOperator(nn.Module): 8 | SOBEL_KERNEL_X = torch.tensor( 9 | [[-1.0, 0.0, 1.0], [-2.0, 0.0, 2.0], [-1.0, 0.0, 1.0]] 10 | ) 11 | SOBEL_KERNEL_Y = torch.tensor( 12 | [[-1.0, -2.0, -1.0], [0.0, 0.0, 0.0], [1.0, 2.0, 1.0]] 13 | ) 14 | 15 | def __init__(self, device="cuda"): 16 | super(SobelOperator, self).__init__() 17 | self.device = device 18 | self.edge_conv_x = nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False).to( 19 | self.device 20 | ) 21 | self.edge_conv_y = nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False).to( 22 | self.device 23 | ) 24 | self.edge_conv_x.weight = nn.Parameter( 25 | self.SOBEL_KERNEL_X.view((1, 1, 3, 3)).to(self.device) 26 | ) 27 | self.edge_conv_y.weight = nn.Parameter( 28 | self.SOBEL_KERNEL_Y.view((1, 1, 3, 3)).to(self.device) 29 | ) 30 | 31 | @torch.no_grad() 32 | def forward( 33 | self, 34 | image: Image.Image, 35 | low_threshold: float, 36 | high_threshold: float, 37 | output_type="pil", 38 | ) -> Image.Image | torch.Tensor | tuple[Image.Image, torch.Tensor]: 39 | # Convert PIL image to PyTorch tensor 40 | image_gray = image.convert("L") 41 | image_tensor = ToTensor()(image_gray).unsqueeze(0).to(self.device) 42 | 43 | # Compute gradients 44 | edge_x = self.edge_conv_x(image_tensor) 45 | edge_y = self.edge_conv_y(image_tensor) 46 | edge = torch.sqrt(torch.square(edge_x) + torch.square(edge_y)) 47 | 48 | # Apply thresholding 49 | edge.div_(edge.max()) # Normalize to 0-1 (in-place operation) 50 | edge[edge >= high_threshold] = 1.0 51 | edge[edge <= low_threshold] = 0.0 52 | 53 | # Convert the result back to a PIL image 54 | if output_type == "pil": 55 | return ToPILImage()(edge.squeeze(0).cpu()) 56 | elif output_type == "tensor": 57 | return edge 58 | elif output_type == "pil,tensor": 59 | return ToPILImage()(edge.squeeze(0).cpu()), edge 60 | 61 | 62 | class ScharrOperator(nn.Module): 63 | SCHARR_KERNEL_X = torch.tensor( 64 | [[-3.0, 0.0, 3.0], [-10.0, 0.0, 10.0], [-3.0, 0.0, 3.0]] 65 | ) 66 | SCHARR_KERNEL_Y = torch.tensor( 67 | [[-3.0, -10.0, -3.0], [0.0, 0.0, 0.0], [3.0, 10.0, 3.0]] 68 | ) 69 | 70 | def __init__(self, device="cuda"): 71 | super(ScharrOperator, self).__init__() 72 | self.device = device 73 | self.edge_conv_x = nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False).to( 74 | self.device 75 | ) 76 | self.edge_conv_y = nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False).to( 77 | self.device 78 | ) 79 | self.edge_conv_x.weight = nn.Parameter( 80 | self.SCHARR_KERNEL_X.view((1, 1, 3, 3)).to(self.device) 81 | ) 82 | self.edge_conv_y.weight = nn.Parameter( 83 | self.SCHARR_KERNEL_Y.view((1, 1, 3, 3)).to(self.device) 84 | ) 85 | 86 | @torch.no_grad() 87 | def forward( 88 | self, 89 | image: Image.Image, 90 | low_threshold: float, 91 | high_threshold: float, 92 | output_type="pil", 93 | invert: bool = False, 94 | ) -> Image.Image | torch.Tensor | tuple[Image.Image, torch.Tensor]: 95 | # Convert PIL image to PyTorch tensor 96 | image_gray = image.convert("L") 97 | image_tensor = ToTensor()(image_gray).unsqueeze(0).to(self.device) 98 | 99 | # Compute gradients 100 | edge_x = self.edge_conv_x(image_tensor) 101 | edge_y = self.edge_conv_y(image_tensor) 102 | edge = torch.abs(edge_x) + torch.abs(edge_y) 103 | 104 | # Apply thresholding 105 | edge.div_(edge.max()) # Normalize to 0-1 (in-place operation) 106 | edge[edge >= high_threshold] = 1.0 107 | edge[edge <= low_threshold] = 0.0 108 | if invert: 109 | edge = 1 - edge 110 | 111 | # Convert the result back to a PIL image 112 | if output_type == "pil": 113 | return ToPILImage()(edge.squeeze(0).cpu()) 114 | elif output_type == "tensor": 115 | return edge 116 | elif output_type == "pil,tensor": 117 | return ToPILImage()(edge.squeeze(0).cpu()), edge 118 | -------------------------------------------------------------------------------- /server/pipelines/utils/safety_checker.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import torch 16 | import torch.nn as nn 17 | from transformers import CLIPConfig, CLIPVisionModel, PreTrainedModel 18 | from PIL import Image 19 | 20 | 21 | def cosine_distance(image_embeds, text_embeds): 22 | normalized_image_embeds = nn.functional.normalize(image_embeds) 23 | normalized_text_embeds = nn.functional.normalize(text_embeds) 24 | return torch.mm(normalized_image_embeds, normalized_text_embeds.t()) 25 | 26 | 27 | class StableDiffusionSafetyChecker(PreTrainedModel): 28 | config_class = CLIPConfig 29 | 30 | _no_split_modules = ["CLIPEncoderLayer"] 31 | 32 | def __init__(self, config: CLIPConfig): 33 | super().__init__(config) 34 | 35 | self.vision_model = CLIPVisionModel(config.vision_config) 36 | self.visual_projection = nn.Linear( 37 | config.vision_config.hidden_size, config.projection_dim, bias=False 38 | ) 39 | 40 | self.concept_embeds = nn.Parameter( 41 | torch.ones(17, config.projection_dim), requires_grad=False 42 | ) 43 | self.special_care_embeds = nn.Parameter( 44 | torch.ones(3, config.projection_dim), requires_grad=False 45 | ) 46 | 47 | self.concept_embeds_weights = nn.Parameter(torch.ones(17), requires_grad=False) 48 | self.special_care_embeds_weights = nn.Parameter( 49 | torch.ones(3), requires_grad=False 50 | ) 51 | 52 | @torch.no_grad() 53 | def forward(self, clip_input, images): 54 | pooled_output = self.vision_model(clip_input)[1] # pooled_output 55 | image_embeds = self.visual_projection(pooled_output) 56 | 57 | # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16 58 | special_cos_dist = ( 59 | cosine_distance(image_embeds, self.special_care_embeds) 60 | .cpu() 61 | .float() 62 | .numpy() 63 | ) 64 | cos_dist = ( 65 | cosine_distance(image_embeds, self.concept_embeds).cpu().float().numpy() 66 | ) 67 | 68 | result = [] 69 | batch_size = image_embeds.shape[0] 70 | for i in range(batch_size): 71 | result_img = { 72 | "special_scores": {}, 73 | "special_care": [], 74 | "concept_scores": {}, 75 | "bad_concepts": [], 76 | } 77 | 78 | # increase this value to create a stronger `nfsw` filter 79 | # at the cost of increasing the possibility of filtering benign images 80 | adjustment = 0.0 81 | 82 | for concept_idx in range(len(special_cos_dist[0])): 83 | concept_cos = special_cos_dist[i][concept_idx] 84 | concept_threshold = self.special_care_embeds_weights[concept_idx].item() 85 | result_img["special_scores"][concept_idx] = round( 86 | concept_cos - concept_threshold + adjustment, 3 87 | ) 88 | if result_img["special_scores"][concept_idx] > 0: 89 | result_img["special_care"].append( 90 | {concept_idx, result_img["special_scores"][concept_idx]} 91 | ) 92 | adjustment = 0.01 93 | 94 | for concept_idx in range(len(cos_dist[0])): 95 | concept_cos = cos_dist[i][concept_idx] 96 | concept_threshold = self.concept_embeds_weights[concept_idx].item() 97 | result_img["concept_scores"][concept_idx] = round( 98 | concept_cos - concept_threshold + adjustment, 3 99 | ) 100 | if result_img["concept_scores"][concept_idx] > 0: 101 | result_img["bad_concepts"].append(concept_idx) 102 | 103 | result.append(result_img) 104 | 105 | has_nsfw_concepts = [len(res["bad_concepts"]) > 0 for res in result] 106 | 107 | return has_nsfw_concepts 108 | 109 | @torch.no_grad() 110 | def forward_onnx(self, clip_input: torch.FloatTensor, images: torch.FloatTensor): 111 | pooled_output = self.vision_model(clip_input)[1] # pooled_output 112 | image_embeds = self.visual_projection(pooled_output) 113 | 114 | special_cos_dist = cosine_distance(image_embeds, self.special_care_embeds) 115 | cos_dist = cosine_distance(image_embeds, self.concept_embeds) 116 | 117 | # increase this value to create a stronger `nsfw` filter 118 | # at the cost of increasing the possibility of filtering benign images 119 | adjustment = 0.0 120 | 121 | special_scores = ( 122 | special_cos_dist - self.special_care_embeds_weights + adjustment 123 | ) 124 | # special_scores = special_scores.round(decimals=3) 125 | special_care = torch.any(special_scores > 0, dim=1) 126 | special_adjustment = special_care * 0.01 127 | special_adjustment = special_adjustment.unsqueeze(1).expand( 128 | -1, cos_dist.shape[1] 129 | ) 130 | 131 | concept_scores = (cos_dist - self.concept_embeds_weights) + special_adjustment 132 | # concept_scores = concept_scores.round(decimals=3) 133 | has_nsfw_concepts = torch.any(concept_scores > 0, dim=1) 134 | 135 | images[has_nsfw_concepts] = 0.0 # black image 136 | 137 | return images, has_nsfw_concepts 138 | 139 | 140 | class SafetyChecker: 141 | def __init__(self, device="cuda"): 142 | from transformers import CLIPFeatureExtractor 143 | 144 | self.device = device 145 | self.safety_checker = StableDiffusionSafetyChecker.from_pretrained( 146 | "CompVis/stable-diffusion-safety-checker" 147 | ).to(device) 148 | self.feature_extractor = CLIPFeatureExtractor.from_pretrained( 149 | "openai/clip-vit-base-patch32" 150 | ) 151 | 152 | def __call__( 153 | self, images: list[Image.Image] | Image.Image 154 | ) -> tuple[list[Image.Image], list[bool]] | tuple[Image.Image, bool]: 155 | images_list = [images] if isinstance(images, Image.Image) else images 156 | 157 | safety_checker_input = self.feature_extractor( 158 | images_list, return_tensors="pt" 159 | ).to(self.device) 160 | 161 | has_nsfw_concepts = self.safety_checker( 162 | images=[images_list], 163 | clip_input=safety_checker_input.pixel_values.to(self.device), 164 | ) 165 | 166 | if isinstance(images, Image.Image): 167 | return images, has_nsfw_concepts[0] 168 | 169 | return images, has_nsfw_concepts 170 | -------------------------------------------------------------------------------- /server/requirements.txt: -------------------------------------------------------------------------------- 1 | # Use with: uv pip install --no-cache --system --index-strategy=unsafe-best-match -r requirements.txt 2 | numpy 3 | diffusers<=0.33.1 4 | llvmlite>=0.39.0 5 | numba>=0.56.0 6 | transformers 7 | pydantic 8 | huggingface-hub 9 | hf_transfer 10 | fastapi 11 | uvicorn[standard] 12 | Pillow==11.0.0 13 | accelerate 14 | compel==2.0.2 15 | controlnet-aux==0.0.9 16 | peft 17 | markdown2 18 | safetensors 19 | setuptools 20 | mpmath 21 | controlnet-aux 22 | sentencepiece==0.2.0 23 | optimum-quanto # has to be optimum-quanto==0.2.5 for pruna int4 24 | gguf 25 | types-Pillow 26 | mypy 27 | python-dotenv 28 | requests>=2.31.0 # Added explicitly to resolve dependency conflict 29 | 30 | --extra-index-url https://download.pytorch.org/whl/cu118 31 | torch==2.5.1 32 | torchvision 33 | torchaudio 34 | xformers; sys_platform != 'darwin' or platform_machine != 'arm64' 35 | pruna[stable-fast] ; sys_platform != 'darwin' or platform_machine != 'arm64' 36 | 37 | # stable_fast @ https://github.com/chengzeyi/stable-fast/releases/download/nightly/stable_fast-1.0.5.dev20241127+torch230cu121-cp310-cp310-manylinux2014_x86_64.whl ; sys_platform != 'darwin' or platform_machine != 'arm64' 38 | #oneflow @ https://github.com/siliconflow/oneflow_releases/releases/download/community_cu121/oneflow-0.9.1.dev20241114%2Bcu121-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl ; sys_platform != 'darwin' or platform_machine != 'arm64' 39 | #onediff @ git+https://github.com/siliconflow/onediff.git@main#egg=onediff ; sys_platform != 'darwin' or platform_machine != 'arm64' -------------------------------------------------------------------------------- /server/util.py: -------------------------------------------------------------------------------- 1 | from importlib import import_module 2 | from typing import Any, TypeVar 3 | from PIL import Image 4 | import io 5 | from pydantic import BaseModel 6 | 7 | 8 | # Used only for type checking the pipeline class 9 | TPipeline = TypeVar("TPipeline", bound=type[Any]) 10 | 11 | 12 | class ParamsModel(BaseModel): 13 | """Base model for pipeline parameters.""" 14 | 15 | model_config = { 16 | "arbitrary_types_allowed": True, 17 | "extra": "allow", # Allow extra attributes for dynamic fields like 'image' 18 | } 19 | 20 | @classmethod 21 | def from_dict(cls, data: dict[str, Any]) -> "ParamsModel": 22 | """Create a model instance from dictionary data.""" 23 | return cls.model_validate(data) 24 | 25 | def to_dict(self) -> dict[str, Any]: 26 | """Convert model to dictionary.""" 27 | return self.model_dump() 28 | 29 | 30 | def get_pipeline_class(pipeline_name: str) -> type: 31 | """ 32 | Dynamically imports and returns the Pipeline class from a specified module. 33 | 34 | Args: 35 | pipeline_name: The name of the pipeline module to import 36 | 37 | Returns: 38 | The Pipeline class from the specified module 39 | 40 | Raises: 41 | ValueError: If the module or Pipeline class isn't found 42 | TypeError: If Pipeline is not a class 43 | """ 44 | try: 45 | module = import_module(f"pipelines.{pipeline_name}") 46 | except ModuleNotFoundError: 47 | raise ValueError(f"Pipeline {pipeline_name} module not found") 48 | 49 | pipeline_class = getattr(module, "Pipeline", None) 50 | 51 | if pipeline_class is None: 52 | raise ValueError(f"'Pipeline' class not found in module '{pipeline_name}'.") 53 | 54 | # Type check to ensure we're returning a class 55 | if not isinstance(pipeline_class, type): 56 | raise TypeError(f"'Pipeline' in module '{pipeline_name}' is not a class") 57 | 58 | return pipeline_class 59 | 60 | 61 | def bytes_to_pil(image_bytes: bytes) -> Image.Image: 62 | image = Image.open(io.BytesIO(image_bytes)) 63 | return image 64 | 65 | 66 | def pil_to_frame(image: Image.Image) -> bytes: 67 | frame_data = io.BytesIO() 68 | image.save(frame_data, format="JPEG", quality=80, optimize=True, progressive=True) 69 | frame_data = frame_data.getvalue() 70 | return ( 71 | b"--frame\r\n" 72 | + b"Content-Type: image/jpeg\r\n" 73 | + f"Content-Length: {len(frame_data)}\r\n\r\n".encode() 74 | + frame_data 75 | + b"\r\n" 76 | ) 77 | 78 | 79 | def is_firefox(user_agent: str) -> bool: 80 | return "Firefox" in user_agent 81 | --------------------------------------------------------------------------------