├── .gitattributes
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── build-run.sh
├── frontend
├── .gitignore
├── .npmrc
├── .nvmrc
├── .prettierignore
├── .prettierrc
├── README.md
├── package-lock.json
├── package.json
├── src
│ ├── app.css
│ ├── app.d.ts
│ ├── app.html
│ ├── lib
│ │ ├── components
│ │ │ ├── AspectRatioSelect.svelte
│ │ │ ├── Button.svelte
│ │ │ ├── Checkbox.svelte
│ │ │ ├── ImagePlayer.svelte
│ │ │ ├── InputRange.svelte
│ │ │ ├── MediaListSwitcher.svelte
│ │ │ ├── PipelineOptions.svelte
│ │ │ ├── SeedInput.svelte
│ │ │ ├── Selectlist.svelte
│ │ │ ├── TextArea.svelte
│ │ │ ├── VideoInput.svelte
│ │ │ └── Warning.svelte
│ │ ├── icons
│ │ │ ├── aspect.svelte
│ │ │ ├── expand.svelte
│ │ │ ├── floppy.svelte
│ │ │ ├── screen.svelte
│ │ │ └── spinner.svelte
│ │ ├── index.ts
│ │ ├── lcmLive.ts
│ │ ├── mediaStream.ts
│ │ ├── store.ts
│ │ ├── types.ts
│ │ └── utils.ts
│ ├── piexifjs.d.ts
│ └── routes
│ │ ├── +layout.svelte
│ │ ├── +page.svelte
│ │ └── +page.ts
├── static
│ └── favicon.png
├── svelte.config.js
├── tailwind.config.js
├── tsconfig.json
└── vite.config.ts
├── qr-code.png
└── server
├── config.py
├── connection_manager.py
├── device.py
├── main.py
├── pipelines
├── IPcompositionHyperSD15.py
├── IPcompositionHyperSDXL.py
├── __init__.py
├── controlnet.py
├── controlnetDepthFlashSD.py
├── controlnetDepthHyperSD.py
├── controlnetDepthHyperSDXL.py
├── controlnetFlashSD.py
├── controlnetFlashSDXL.py
├── controlnetHyperSD.py
├── controlnetHyperSDXL.py
├── controlnetLoraSD15.py
├── controlnetLoraSD15QRCode.py
├── controlnetLoraSDXL-Lightning.py
├── controlnetLoraSDXL.py
├── controlnetMistoLineHyperSDXL.py
├── controlnetPCMSD15.py
├── controlnetSDTurbo.py
├── controlnetSDXLTurbo.py
├── controlnetSegmindVegaRT.py
├── img2img.py
├── img2imgFlux.py
├── img2imgSDTurbo.py
├── img2imgSDXL-Lightning.py
├── img2imgSDXLTurbo.py
├── img2imgSDXS512.py
├── img2imgSegmindVegaRT.py
├── pix2pix
│ ├── __init__.py
│ ├── model.py
│ └── pix2pix_turbo.py
├── pix2pixTurbo.py
├── txt2img.py
├── txt2imgLora.py
├── txt2imgLoraSDXL.py
└── utils
│ ├── canny_gpu.py
│ └── safety_checker.py
├── requirements.txt
└── util.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.7z filter=lfs diff=lfs merge=lfs -text
2 | *.arrow filter=lfs diff=lfs merge=lfs -text
3 | *.bin filter=lfs diff=lfs merge=lfs -text
4 | *.bz2 filter=lfs diff=lfs merge=lfs -text
5 | *.ckpt filter=lfs diff=lfs merge=lfs -text
6 | *.ftz filter=lfs diff=lfs merge=lfs -text
7 | *.gz filter=lfs diff=lfs merge=lfs -text
8 | *.h5 filter=lfs diff=lfs merge=lfs -text
9 | *.joblib filter=lfs diff=lfs merge=lfs -text
10 | *.lfs.* filter=lfs diff=lfs merge=lfs -text
11 | *.mlmodel filter=lfs diff=lfs merge=lfs -text
12 | *.model filter=lfs diff=lfs merge=lfs -text
13 | *.msgpack filter=lfs diff=lfs merge=lfs -text
14 | *.npy filter=lfs diff=lfs merge=lfs -text
15 | *.npz filter=lfs diff=lfs merge=lfs -text
16 | *.onnx filter=lfs diff=lfs merge=lfs -text
17 | *.ot filter=lfs diff=lfs merge=lfs -text
18 | *.parquet filter=lfs diff=lfs merge=lfs -text
19 | *.pb filter=lfs diff=lfs merge=lfs -text
20 | *.pickle filter=lfs diff=lfs merge=lfs -text
21 | *.pkl filter=lfs diff=lfs merge=lfs -text
22 | *.pt filter=lfs diff=lfs merge=lfs -text
23 | *.pth filter=lfs diff=lfs merge=lfs -text
24 | *.rar filter=lfs diff=lfs merge=lfs -text
25 | *.safetensors filter=lfs diff=lfs merge=lfs -text
26 | saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27 | *.tar.* filter=lfs diff=lfs merge=lfs -text
28 | *.tar filter=lfs diff=lfs merge=lfs -text
29 | *.tflite filter=lfs diff=lfs merge=lfs -text
30 | *.tgz filter=lfs diff=lfs merge=lfs -text
31 | *.wasm filter=lfs diff=lfs merge=lfs -text
32 | *.xz filter=lfs diff=lfs merge=lfs -text
33 | *.zip filter=lfs diff=lfs merge=lfs -text
34 | *.zst filter=lfs diff=lfs merge=lfs -text
35 | *tfevents* filter=lfs diff=lfs merge=lfs -text
36 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | venv/
3 | public/
4 | *.pem
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
2 |
3 | ARG DEBIAN_FRONTEND=noninteractive
4 |
5 | ENV PYTHONUNBUFFERED=1
6 | ENV NODE_MAJOR=20
7 |
8 | RUN apt-get update && apt-get install --no-install-recommends -y \
9 | build-essential \
10 | python3.9 \
11 | python3-pip \
12 | python3-dev \
13 | git \
14 | ffmpeg \
15 | google-perftools \
16 | ca-certificates curl gnupg \
17 | && apt-get clean && rm -rf /var/lib/apt/lists/*
18 |
19 | WORKDIR /code
20 |
21 | RUN mkdir -p /etc/apt/keyrings
22 | RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
23 |
24 | RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_MAJOR}.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list > /dev/null
25 | RUN apt-get update && apt-get install nodejs -y
26 |
27 | COPY ./server/requirements.txt /code/requirements.txt
28 |
29 | # Download and install UV
30 | ADD https://astral.sh/uv/install.sh /uv-installer.sh
31 | RUN chmod +x /uv-installer.sh && \
32 | /uv-installer.sh && \
33 | rm /uv-installer.sh
34 |
35 | ENV PATH="/root/.local/bin:$PATH"
36 |
37 | # Set up a new user named "user" with user ID 1000
38 | RUN useradd -m -u 1000 user
39 |
40 | # Install dependencies using UV as root
41 | RUN uv pip install --no-cache --system --index-strategy=unsafe-best-match -r /code/requirements.txt
42 |
43 | # Switch to the "user" user
44 | USER user
45 |
46 | # Set home to the user's home directory
47 | ENV HOME=/home/user \
48 | PATH=/home/user/.local/bin:/root/.local/bin:$PATH \
49 | PYTHONPATH=$HOME/app \
50 | PYTHONUNBUFFERED=1 \
51 | SYSTEM=spaces
52 |
53 | # Set the working directory to the user's home directory
54 | WORKDIR $HOME/app
55 |
56 | # Copy the current directory contents into the container at $HOME/app setting the owner to the user
57 | COPY --chown=user . $HOME/app
58 |
59 | ENV LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4
60 | CMD ["./build-run.sh"]
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Real-Time Latent Consistency Model Image-to-Image ControlNet
3 | emoji: 🖼️🖼️
4 | colorFrom: gray
5 | colorTo: indigo
6 | sdk: docker
7 | pinned: false
8 | suggested_hardware: a10g-small
9 | disable_embedding: true
10 | ---
11 |
12 | # Real-Time Latent Consistency Model
13 |
14 | This demo showcases [Latent Consistency Model (LCM)](https://latent-consistency-models.github.io/) using [Diffusers](https://huggingface.co/docs/diffusers/using-diffusers/lcm) with a MJPEG stream server. You can read more about LCM + LoRAs with diffusers [here](https://huggingface.co/blog/lcm_lora).
15 |
16 | You need a webcam to run this demo. 🤗
17 |
18 | See a collecting with live demos [here](https://huggingface.co/collections/latent-consistency/latent-consistency-model-demos-654e90c52adb0688a0acbe6f)
19 |
20 | ## Running Locally
21 |
22 | You need CUDA and Python 3.10, Node > 19, Mac with an M1/M2/M3 chip or Intel Arc GPU
23 |
24 |
25 | ## Install
26 |
27 | ```bash
28 | uv venv --python=3.10
29 | source .venv/bin/activate
30 | uv pip install -r server/requirements.txt
31 | cd frontend && npm install && npm run build && cd ..
32 | python server/main.py --reload --pipeline img2imgSDTurbo
33 | ```
34 |
35 | Don't forget to fuild the frontend!!!
36 |
37 | ```bash
38 | cd frontend && npm install && npm run build && cd ..
39 | ```
40 |
41 | # Pipelines
42 | You can build your own pipeline following examples here [here](pipelines),
43 |
44 |
45 | # LCM
46 | ### Image to Image
47 |
48 | ```bash
49 | python server/main.py --reload --pipeline img2img
50 | ```
51 |
52 | # LCM
53 | ### Text to Image
54 |
55 | ```bash
56 | python server/main.py --reload --pipeline txt2img
57 | ```
58 |
59 | ### Image to Image ControlNet Canny
60 |
61 | ```bash
62 | python server/main.py --reload --pipeline controlnet
63 | ```
64 |
65 |
66 | # LCM + LoRa
67 |
68 | Using LCM-LoRA, giving it the super power of doing inference in as little as 4 steps. [Learn more here](https://huggingface.co/blog/lcm_lora) or [technical report](https://huggingface.co/papers/2311.05556)
69 |
70 |
71 | ### Image to Image ControlNet Canny LoRa
72 |
73 | ```bash
74 | python server/main.py --reload --pipeline controlnetLoraSD15
75 | ```
76 | or SDXL, note that SDXL is slower than SD15 since the inference runs on 1024x1024 images
77 |
78 | ```bash
79 | python server/main.py --reload --pipeline controlnetLoraSDXL
80 | ```
81 |
82 | ### Text to Image
83 |
84 | ```bash
85 | python server/main.py --reload --pipeline txt2imgLora
86 | ```
87 |
88 | ```bash
89 | python server/main.py --reload --pipeline txt2imgLoraSDXL
90 | ```
91 | # Available Pipelines
92 |
93 | #### [LCM](https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7)
94 |
95 | `img2img`
96 | `txt2img`
97 | `controlnet`
98 | `txt2imgLora`
99 | `controlnetLoraSD15`
100 |
101 | #### [SD15](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
102 | `controlnetLoraSDXL`
103 | `txt2imgLoraSDXL`
104 |
105 | #### [SDXL Turbo](https://huggingface.co/stabilityai/sd-xl-turbo)
106 |
107 | `img2imgSDXLTurbo`
108 | `controlnetSDXLTurbo`
109 |
110 |
111 | #### [SDTurbo](https://huggingface.co/stabilityai/sd-turbo)
112 | `img2imgSDTurbo`
113 | `controlnetSDTurbo`
114 |
115 | #### [Segmind-Vega](https://huggingface.co/segmind/Segmind-Vega)
116 | `controlnetSegmindVegaRT`
117 | `img2imgSegmindVegaRT`
118 |
119 |
120 | ### Setting environment variables
121 |
122 |
123 | * `--host`: Host address (default: 0.0.0.0)
124 | * `--port`: Port number (default: 7860)
125 | * `--reload`: Reload code on change
126 | * `--max-queue-size`: Maximum queue size (optional)
127 | * `--timeout`: Timeout period (optional)
128 | * `--safety-checker`: Enable Safety Checker (optional)
129 | * `--torch-compile`: Use Torch Compile
130 | * `--use-taesd` / `--no-taesd`: Use Tiny Autoencoder
131 | * `--pipeline`: Pipeline to use (default: "txt2img")
132 | * `--ssl-certfile`: SSL Certificate File (optional)
133 | * `--ssl-keyfile`: SSL Key File (optional)
134 | * `--debug`: Print Inference time
135 | * `--compel`: Compel option
136 | * `--sfast`: Enable Stable Fast
137 | * `--onediff`: Enable OneDiff
138 |
139 | If you run using `bash build-run.sh` you can set `PIPELINE` variables to choose the pipeline you want to run
140 |
141 | ```bash
142 | PIPELINE=txt2imgLoraSDXL bash build-run.sh
143 | ```
144 |
145 | and setting environment variables
146 |
147 | ```bash
148 | TIMEOUT=120 SAFETY_CHECKER=True MAX_QUEUE_SIZE=4 python server/main.py --reload --pipeline txt2imgLoraSDXL
149 | ```
150 |
151 | If you're running locally and want to test it on Mobile Safari, the webserver needs to be served over HTTPS, or follow this instruction on my [comment](https://github.com/radames/Real-Time-Latent-Consistency-Model/issues/17#issuecomment-1811957196)
152 |
153 | ```bash
154 | openssl req -newkey rsa:4096 -nodes -keyout key.pem -x509 -days 365 -out certificate.pem
155 | python server/main.py --reload --ssl-certfile=certificate.pem --ssl-keyfile=key.pem
156 | ```
157 |
158 | ## Docker
159 |
160 | You need NVIDIA Container Toolkit for Docker, defaults to `controlnet``
161 |
162 | ```bash
163 | docker build -t lcm-live .
164 | docker run -ti -p 7860:7860 --gpus all lcm-live
165 | ```
166 |
167 | reuse models data from host to avoid downloading them again, you can change `~/.cache/huggingface` to any other directory, but if you use hugingface-cli locally, you can share the same cache
168 |
169 | ```bash
170 | docker run -ti -p 7860:7860 -e HF_HOME=/data -v ~/.cache/huggingface:/data --gpus all lcm-live
171 | ```
172 |
173 |
174 | or with environment variables
175 |
176 | ```bash
177 | docker run -ti -e PIPELINE=txt2imgLoraSDXL -p 7860:7860 --gpus all lcm-live
178 | ```
179 |
180 |
181 | # Demo on Hugging Face
182 |
183 |
184 | * [radames/Real-Time-Latent-Consistency-Model](https://huggingface.co/spaces/radames/Real-Time-Latent-Consistency-Model)
185 | * [radames/Real-Time-SD-Turbo](https://huggingface.co/spaces/radames/Real-Time-SD-Turbo)
186 | * [latent-consistency/Real-Time-LCM-ControlNet-Lora-SD1.5](https://huggingface.co/spaces/latent-consistency/Real-Time-LCM-ControlNet-Lora-SD1.5)
187 | * [latent-consistency/Real-Time-LCM-Text-to-Image-Lora-SD1.5](https://huggingface.co/spaces/latent-consistency/Real-Time-LCM-Text-to-Image-Lora-SD1.5)
188 | * [radames/Real-Time-Latent-Consistency-Model-Text-To-Image](https://huggingface.co/spaces/radames/Real-Time-Latent-Consistency-Model-Text-To-Image)
189 |
190 |
191 |
192 |
193 | https://github.com/radames/Real-Time-Latent-Consistency-Model/assets/102277/c4003ac5-e7ff-44c0-97d3-464bb659de70
194 |
--------------------------------------------------------------------------------
/build-run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | cd frontend
3 | npm install
4 | npm run build
5 | if [ $? -eq 0 ]; then
6 | echo -e "\033[1;32m\nfrontend build success \033[0m"
7 | else
8 | echo -e "\033[1;31m\nfrontend build failed\n\033[0m" >&2 exit 1
9 | fi
10 | cd ../
11 | #check if var PIPELINE is set otherwise get default
12 | if [ -z ${PIPELINE+x} ]; then
13 | PIPELINE="controlnet"
14 | fi
15 | if [ -z ${COMPILE+x} ]; then
16 | COMPILE="--sfast"
17 | fi
18 | echo -e "\033[1;32m\npipeline: $PIPELINE \033[0m"
19 | echo -e "\033[1;32m\ncompile: $COMPILE \033[0m"
20 | python3 ./server/main.py --port 7860 --host 0.0.0.0 --pipeline $PIPELINE $COMPILE
--------------------------------------------------------------------------------
/frontend/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 |
3 | # Output
4 | .output
5 | .vercel
6 | .netlify
7 | .wrangler
8 | /.svelte-kit
9 | /build
10 |
11 | # OS
12 | .DS_Store
13 | Thumbs.db
14 |
15 | # Env
16 | .env
17 | .env.*
18 | !.env.example
19 | !.env.test
20 |
21 | # Vite
22 | vite.config.js.timestamp-*
23 | vite.config.ts.timestamp-*
24 | public/
25 |
--------------------------------------------------------------------------------
/frontend/.npmrc:
--------------------------------------------------------------------------------
1 | engine-strict=true
2 |
--------------------------------------------------------------------------------
/frontend/.nvmrc:
--------------------------------------------------------------------------------
1 | v20.14.0
2 |
--------------------------------------------------------------------------------
/frontend/.prettierignore:
--------------------------------------------------------------------------------
1 | # Package Managers
2 | package-lock.json
3 | pnpm-lock.yaml
4 | yarn.lock
5 | bun.lock
6 | bun.lockb
--------------------------------------------------------------------------------
/frontend/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 | "plugins": ["prettier-plugin-svelte", "prettier-plugin-tailwindcss"],
3 | "overrides": [
4 | {
5 | "files": "*.svelte",
6 | "options": {
7 | "parser": "svelte"
8 | }
9 | }
10 | ]
11 | }
12 |
--------------------------------------------------------------------------------
/frontend/README.md:
--------------------------------------------------------------------------------
1 | # sv
2 |
3 | Everything you need to build a Svelte project, powered by [`sv`](https://github.com/sveltejs/cli).
4 |
5 | ## Creating a project
6 |
7 | If you're seeing this, you've probably already done this step. Congrats!
8 |
9 | ```bash
10 | # create a new project in the current directory
11 | npx sv create
12 |
13 | # create a new project in my-app
14 | npx sv create my-app
15 | ```
16 |
17 | ## Developing
18 |
19 | Once you've created a project and installed dependencies with `npm install` (or `pnpm install` or `yarn`), start a development server:
20 |
21 | ```bash
22 | npm run dev
23 |
24 | # or start the server and open the app in a new browser tab
25 | npm run dev -- --open
26 | ```
27 |
28 | ## Building
29 |
30 | To create a production version of your app:
31 |
32 | ```bash
33 | npm run build
34 | ```
35 |
36 | You can preview the production build with `npm run preview`.
37 |
38 | > To deploy your app, you may need to install an [adapter](https://svelte.dev/docs/kit/adapters) for your target environment.
39 |
--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "frontend",
3 | "private": true,
4 | "version": "0.0.1",
5 | "type": "module",
6 | "scripts": {
7 | "dev": "vite dev",
8 | "build": "vite build",
9 | "preview": "vite preview",
10 | "prepare": "svelte-kit sync || echo ''",
11 | "check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
12 | "check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
13 | "lint": "eslint . && prettier --check .",
14 | "format": "prettier --write ."
15 | },
16 | "devDependencies": {
17 | "@eslint/compat": "^1.2.5",
18 | "@eslint/js": "^9.26.0",
19 | "@sveltejs/adapter-static": "^3.0.8",
20 | "@sveltejs/kit": "^2.16.0",
21 | "@sveltejs/vite-plugin-svelte": "^5.0.0",
22 | "@tailwindcss/typography": "^0.5.15",
23 | "@tailwindcss/vite": "^4.1.5",
24 | "eslint": "^9.26.0",
25 | "eslint-config-prettier": "^10.0.1",
26 | "eslint-plugin-svelte": "^3.0.0",
27 | "globals": "^16.0.0",
28 | "prettier": "^3.4.2",
29 | "prettier-plugin-svelte": "^3.3.3",
30 | "prettier-plugin-tailwindcss": "^0.6.11",
31 | "svelte": "^5.0.0",
32 | "svelte-check": "^4.0.0",
33 | "tailwindcss": "^4.1.5",
34 | "typescript": "^5.0.0",
35 | "typescript-eslint": "^8.20.0",
36 | "vite": "^6.2.6"
37 | },
38 | "dependencies": {
39 | "piexifjs": "^1.0.6",
40 | "rvfc-polyfill": "^1.0.7"
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/frontend/src/app.css:
--------------------------------------------------------------------------------
1 | @import "tailwindcss";
2 | @plugin '@tailwindcss/typography';
3 |
--------------------------------------------------------------------------------
/frontend/src/app.d.ts:
--------------------------------------------------------------------------------
1 | // See https://svelte.dev/docs/kit/types#app.d.ts
2 | // for information about these interfaces
3 | declare global {
4 | namespace App {
5 | // interface Error {}
6 | // interface Locals {}
7 | // interface PageData {}
8 | // interface PageState {}
9 | // interface Platform {}
10 | }
11 | }
12 |
13 | export {};
14 |
--------------------------------------------------------------------------------
/frontend/src/app.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | %sveltekit.head%
8 |
9 |
10 | %sveltekit.body%
11 |
12 |
13 |
--------------------------------------------------------------------------------
/frontend/src/lib/components/AspectRatioSelect.svelte:
--------------------------------------------------------------------------------
1 |
17 |
18 |
19 |
24 | {#each options as option (option)}
25 | {option}
26 | {/each}
27 |
28 |
29 |
--------------------------------------------------------------------------------
/frontend/src/lib/components/Button.svelte:
--------------------------------------------------------------------------------
1 |
4 |
5 |
6 | {@render props.children()}
7 |
8 |
9 |
15 |
--------------------------------------------------------------------------------
/frontend/src/lib/components/Checkbox.svelte:
--------------------------------------------------------------------------------
1 |
11 |
12 |
13 | {params?.title}
14 |
20 |
21 |
--------------------------------------------------------------------------------
/frontend/src/lib/components/ImagePlayer.svelte:
--------------------------------------------------------------------------------
1 |
42 |
43 |
46 | {#if $lcmLiveStatus === LCMLiveStatus.CONNECTING}
47 |
48 |
49 |
52 |
Connecting...
53 |
54 | {:else if isLCMRunning}
55 | {#if !isExpanded}
56 |
57 |
58 |
{
63 | console.error("Image stream error:", e);
64 | // If stream fails to load, set status to error
65 | if ($lcmLiveStatus !== LCMLiveStatus.ERROR) {
66 | lcmLiveStatus.set(LCMLiveStatus.ERROR);
67 | }
68 | }}
69 | />
70 | {/if}
71 |
72 |
77 |
78 |
79 |
85 |
86 |
87 |
88 | {:else if $lcmLiveStatus === LCMLiveStatus.ERROR}
89 |
90 |
93 |
Connection error
94 |
95 | {:else}
96 |
97 |
101 | {/if}
102 |
103 |
--------------------------------------------------------------------------------
/frontend/src/lib/components/InputRange.svelte:
--------------------------------------------------------------------------------
1 |
11 |
12 |
13 | {params?.title}
14 |
24 |
30 |
31 |
52 |
--------------------------------------------------------------------------------
/frontend/src/lib/components/MediaListSwitcher.svelte:
--------------------------------------------------------------------------------
1 |
14 |
15 |
18 |
mediaStreamActions.switchCamera(deviceId, value)}
21 | />
22 | mediaStreamActions.startScreenCapture()}
26 | >
27 | Share
28 |
29 |
30 |
31 | {#if $mediaDevices}
32 | mediaStreamActions.switchCamera(deviceId, aspectRatio)}
35 | id="devices-list"
36 | class="block cursor-pointer rounded-md border border-gray-800/50 bg-slate-100/30 p-1 font-medium text-white"
37 | >
38 | {#each $mediaDevices as device (device.deviceId)}
39 | {device.label}
40 | {/each}
41 |
42 | {/if}
43 |
44 |
--------------------------------------------------------------------------------
/frontend/src/lib/components/PipelineOptions.svelte:
--------------------------------------------------------------------------------
1 |
20 |
21 |
22 |
23 | {#if featuredOptions}
24 | {#each featuredOptions as params (params.id)}
25 | {#if params.field === FieldType.RANGE}
26 |
28 | {:else if params.field === FieldType.SEED}
29 |
31 | {:else if params.field === FieldType.TEXTAREA}
32 |
34 | {:else if params.field === FieldType.CHECKBOX}
35 |
37 | {:else if params.field === FieldType.SELECT}
38 |
40 | {/if}
41 | {/each}
42 | {/if}
43 |
44 |
45 |
46 | Advanced Options
47 |
53 | {#if advanceOptions}
54 | {#each advanceOptions as params (params.id)}
55 | {#if params.field === FieldType.RANGE}
56 |
60 | {:else if params.field === FieldType.SEED}
61 |
65 | {:else if params.field === FieldType.TEXTAREA}
66 |
68 | {:else if params.field === FieldType.CHECKBOX}
69 |
73 | {:else if params.field === FieldType.SELECT}
74 |
78 | {/if}
79 | {/each}
80 | {/if}
81 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/frontend/src/lib/components/SeedInput.svelte:
--------------------------------------------------------------------------------
1 |
15 |
16 |
17 | Seed
18 |
25 | Rand
26 |
27 |
--------------------------------------------------------------------------------
/frontend/src/lib/components/Selectlist.svelte:
--------------------------------------------------------------------------------
1 |
11 |
12 |
13 | {params?.title}
14 | {#if params?.values}
15 |
20 | {#each params.values as model, i (model)}
21 | {model}
22 | {/each}
23 |
24 | {/if}
25 |
26 |
--------------------------------------------------------------------------------
/frontend/src/lib/components/TextArea.svelte:
--------------------------------------------------------------------------------
1 |
11 |
12 |
13 |
14 | {params?.title}
15 |
16 |
17 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/frontend/src/lib/components/VideoInput.svelte:
--------------------------------------------------------------------------------
1 |
108 |
109 |
112 |
115 | {#if $mediaDevices.length > 0}
116 |
117 |
118 |
123 |
124 |
125 |
126 | {/if}
127 |
{
131 | videoIsReady = true;
132 | }}
133 | playsinline
134 | autoplay
135 | muted
136 | loop
137 | >
138 |
142 |
143 |
157 |
158 |
--------------------------------------------------------------------------------
/frontend/src/lib/components/Warning.svelte:
--------------------------------------------------------------------------------
1 |
15 |
16 | {#if message}
17 |
18 |
(message = "")}
22 | onkeydown={(e) => e.key === "Enter" && (message = "")}
23 | >
24 |
25 | {message}
26 |
27 |
28 |
29 |
30 | {/if}
31 |
32 |
34 |
--------------------------------------------------------------------------------
/frontend/src/lib/icons/aspect.svelte:
--------------------------------------------------------------------------------
1 |
4 |
5 |
11 |
15 |
16 |
--------------------------------------------------------------------------------
/frontend/src/lib/icons/expand.svelte:
--------------------------------------------------------------------------------
1 |
4 |
5 |
11 |
15 |
16 |
--------------------------------------------------------------------------------
/frontend/src/lib/icons/floppy.svelte:
--------------------------------------------------------------------------------
1 |
4 |
5 |
11 |
15 |
16 |
--------------------------------------------------------------------------------
/frontend/src/lib/icons/screen.svelte:
--------------------------------------------------------------------------------
1 |
4 |
5 |
11 |
15 |
16 |
--------------------------------------------------------------------------------
/frontend/src/lib/icons/spinner.svelte:
--------------------------------------------------------------------------------
1 |
4 |
5 |
11 |
15 |
16 |
--------------------------------------------------------------------------------
/frontend/src/lib/index.ts:
--------------------------------------------------------------------------------
1 | // place files you want to import through the `$lib` alias in this folder.
2 |
--------------------------------------------------------------------------------
/frontend/src/lib/lcmLive.ts:
--------------------------------------------------------------------------------
1 | import { get, writable } from "svelte/store";
2 |
3 | export enum LCMLiveStatus {
4 | CONNECTED = "connected",
5 | DISCONNECTED = "disconnected",
6 | CONNECTING = "connecting",
7 | WAIT = "wait",
8 | SEND_FRAME = "send_frame",
9 | TIMEOUT = "timeout",
10 | ERROR = "error",
11 | }
12 |
13 | const initStatus: LCMLiveStatus = LCMLiveStatus.DISCONNECTED;
14 |
15 | export const lcmLiveStatus = writable(initStatus);
16 | export const streamId = writable(null);
17 |
18 | // WebSocket connection
19 | let websocket: WebSocket | null;
20 |
21 | // Register browser unload event listener to properly close WebSockets
22 | if (typeof window !== "undefined") {
23 | window.addEventListener("beforeunload", () => {
24 | // Close the WebSocket properly if it exists
25 | if (websocket && websocket.readyState === WebSocket.OPEN) {
26 | websocket.close(1000, "Page unload");
27 | }
28 | });
29 | }
30 | export const lcmLiveActions = {
31 | async start(
32 | getSreamdata: () =>
33 | | [Record]
34 | | [Record, Blob],
35 | ) {
36 | return new Promise((resolve, reject) => {
37 | try {
38 | // Set connecting status immediately
39 | lcmLiveStatus.set(LCMLiveStatus.CONNECTING);
40 |
41 | const userId = crypto.randomUUID();
42 | const websocketURL = `${
43 | window.location.protocol === "https:" ? "wss" : "ws"
44 | }:${window.location.host}/api/ws/${userId}`;
45 |
46 | // Close any existing connection first
47 | if (websocket && websocket.readyState !== WebSocket.CLOSED) {
48 | websocket.close();
49 | }
50 |
51 | websocket = new WebSocket(websocketURL);
52 |
53 | // Set a connection timeout
54 | const connectionTimeout = setTimeout(() => {
55 | if (websocket && websocket.readyState !== WebSocket.OPEN) {
56 | console.error("WebSocket connection timeout");
57 | lcmLiveStatus.set(LCMLiveStatus.ERROR);
58 | streamId.set(null);
59 | reject(new Error("Connection timeout. Please try again."));
60 | websocket.close();
61 | }
62 | }, 10000); // 10 second timeout
63 |
64 | websocket.onopen = () => {
65 | clearTimeout(connectionTimeout);
66 | console.log("Connected to websocket");
67 | };
68 |
69 | websocket.onclose = (event) => {
70 | clearTimeout(connectionTimeout);
71 | console.log(
72 | `Disconnected from websocket: ${event.code} ${event.reason}`,
73 | );
74 |
75 | // Only change status if we're not in ERROR state (which would mean we already handled the error)
76 | if (get(lcmLiveStatus) !== LCMLiveStatus.ERROR) {
77 | lcmLiveStatus.set(LCMLiveStatus.DISCONNECTED);
78 | }
79 |
80 | // If connection was never established (close without open)
81 | if (event.code === 1006 && get(streamId) === null) {
82 | reject(
83 | new Error("Cannot connect to server. Please try again later."),
84 | );
85 | }
86 | };
87 |
88 | websocket.onerror = (err) => {
89 | clearTimeout(connectionTimeout);
90 | console.error("WebSocket error:", err);
91 | lcmLiveStatus.set(LCMLiveStatus.ERROR);
92 | streamId.set(null);
93 | reject(new Error("Connection error. Please try again."));
94 | };
95 |
96 | websocket.onmessage = (event) => {
97 | try {
98 | const data = JSON.parse(event.data);
99 | switch (data.status) {
100 | case "connected":
101 | lcmLiveStatus.set(LCMLiveStatus.CONNECTED);
102 | streamId.set(userId);
103 | resolve({ status: "connected", userId });
104 | break;
105 | case "send_frame":
106 | lcmLiveStatus.set(LCMLiveStatus.SEND_FRAME);
107 | try {
108 | const streamData = getSreamdata();
109 | // Send as an object, not a string, to use the proper handling in the send method
110 | this.send({ status: "next_frame" });
111 | for (const d of streamData) {
112 | this.send(d);
113 | }
114 | } catch (error) {
115 | console.error("Error sending frame data:", error);
116 | }
117 | break;
118 | case "wait":
119 | lcmLiveStatus.set(LCMLiveStatus.WAIT);
120 | break;
121 | case "timeout":
122 | console.log("Session timeout");
123 | lcmLiveStatus.set(LCMLiveStatus.TIMEOUT);
124 | streamId.set(null);
125 | reject(new Error("Session timeout. Please restart."));
126 | break;
127 | case "error":
128 | console.error("Server error:", data.message);
129 | lcmLiveStatus.set(LCMLiveStatus.ERROR);
130 | streamId.set(null);
131 | reject(new Error(data.message || "Server error occurred"));
132 | break;
133 | default:
134 | console.log("Unknown message status:", data.status);
135 | }
136 | } catch (error) {
137 | console.error("Error handling websocket message:", error);
138 | }
139 | };
140 | } catch (err) {
141 | console.error("Error initializing websocket:", err);
142 | lcmLiveStatus.set(LCMLiveStatus.ERROR);
143 | streamId.set(null);
144 | reject(err);
145 | }
146 | });
147 | },
148 | send(data: Blob | Record) {
149 | try {
150 | if (websocket && websocket.readyState === WebSocket.OPEN) {
151 | if (data instanceof Blob) {
152 | websocket.send(data);
153 | } else {
154 | websocket.send(JSON.stringify(data));
155 | }
156 | } else {
157 | const readyStateText = websocket
158 | ? ["CONNECTING", "OPEN", "CLOSING", "CLOSED"][websocket.readyState]
159 | : "null";
160 | console.warn(`WebSocket not ready for sending: ${readyStateText}`);
161 |
162 | // If WebSocket is closed unexpectedly, set status to disconnected
163 | if (!websocket || websocket.readyState === WebSocket.CLOSED) {
164 | lcmLiveStatus.set(LCMLiveStatus.DISCONNECTED);
165 | streamId.set(null);
166 | }
167 | }
168 | } catch (error) {
169 | console.error("Error sending data through WebSocket:", error);
170 | // Handle WebSocket error by forcing disconnection
171 | this.stop();
172 | }
173 | },
174 |
175 | async reconnect(
176 | getSreamdata: () =>
177 | | [Record]
178 | | [Record, Blob],
179 | ) {
180 | try {
181 | await this.stop();
182 | // Small delay to ensure clean disconnection before reconnecting
183 | await new Promise((resolve) => setTimeout(resolve, 500));
184 | return await this.start(getSreamdata);
185 | } catch (error) {
186 | console.error("Reconnection failed:", error);
187 | throw error;
188 | }
189 | },
190 |
191 | async stop() {
192 | lcmLiveStatus.set(LCMLiveStatus.DISCONNECTED);
193 | try {
194 | if (websocket) {
195 | // Only attempt to close if not already closed
196 | if (websocket.readyState !== WebSocket.CLOSED) {
197 | // Set up onclose handler to clean up only
198 | websocket.onclose = () => {
199 | console.log("WebSocket closed cleanly during stop()");
200 | };
201 |
202 | // Set up onerror to be silent during intentional closure
203 | websocket.onerror = () => {};
204 |
205 | websocket.close(1000, "Client initiated disconnect");
206 | }
207 | }
208 | } catch (error) {
209 | console.error("Error during WebSocket closure:", error);
210 | } finally {
211 | // Always clean up references
212 | websocket = null;
213 | streamId.set(null);
214 | }
215 | },
216 | };
217 |
--------------------------------------------------------------------------------
/frontend/src/lib/mediaStream.ts:
--------------------------------------------------------------------------------
1 | import { get, writable, type Writable } from "svelte/store";
2 |
3 | const BASE_HEIGHT = 720;
4 | export enum MediaStreamStatusEnum {
5 | INIT = "init",
6 | CONNECTED = "connected",
7 | DISCONNECTED = "disconnected",
8 | }
9 | export const onFrameChangeStore: Writable<{ blob: Blob }> = writable({
10 | blob: new Blob(),
11 | });
12 |
13 | export const mediaDevices = writable([]);
14 | export const mediaStreamStatus = writable(MediaStreamStatusEnum.INIT);
15 | export const mediaStream = writable(null);
16 |
17 | export const mediaStreamActions = {
18 | async enumerateDevices() {
19 | // console.log("Enumerating devices");
20 | await navigator.mediaDevices
21 | .enumerateDevices()
22 | .then((devices) => {
23 | const cameras = devices.filter(
24 | (device) => device.kind === "videoinput",
25 | );
26 | mediaDevices.set(cameras);
27 | })
28 | .catch((err) => {
29 | console.error(err);
30 | });
31 | },
32 | async start(mediaDevicedID?: string, aspectRatio: number = 1) {
33 | const constraints = {
34 | audio: false,
35 | video: {
36 | width: {
37 | ideal: BASE_HEIGHT * aspectRatio,
38 | },
39 | height: {
40 | ideal: BASE_HEIGHT,
41 | },
42 | deviceId: mediaDevicedID,
43 | },
44 | };
45 |
46 | await navigator.mediaDevices
47 | .getUserMedia(constraints)
48 | .then((stream) => {
49 | mediaStreamStatus.set(MediaStreamStatusEnum.CONNECTED);
50 | mediaStream.set(stream);
51 | })
52 | .catch((err) => {
53 | console.error(`${err.name}: ${err.message}`);
54 | mediaStreamStatus.set(MediaStreamStatusEnum.DISCONNECTED);
55 | mediaStream.set(null);
56 | });
57 | },
58 | async startScreenCapture() {
59 | const displayMediaOptions = {
60 | video: {
61 | displaySurface: "window",
62 | },
63 | audio: false,
64 | surfaceSwitching: "include",
65 | };
66 |
67 | let captureStream = null;
68 |
69 | try {
70 | captureStream =
71 | await navigator.mediaDevices.getDisplayMedia(displayMediaOptions);
72 | const videoTrack = captureStream.getVideoTracks()[0];
73 |
74 | console.log("Track settings:");
75 | console.log(JSON.stringify(videoTrack.getSettings(), null, 2));
76 | console.log("Track constraints:");
77 | console.log(JSON.stringify(videoTrack.getConstraints(), null, 2));
78 | mediaStreamStatus.set(MediaStreamStatusEnum.CONNECTED);
79 | mediaStream.set(captureStream);
80 |
81 | const capabilities = videoTrack.getCapabilities();
82 | const aspectRatio = capabilities.aspectRatio;
83 | console.log("Aspect Ratio Constraints:", aspectRatio);
84 | } catch (err) {
85 | console.error(err);
86 | }
87 | },
88 | async switchCamera(mediaDevicedID: string, aspectRatio: number) {
89 | console.log("Switching camera");
90 | if (get(mediaStreamStatus) !== MediaStreamStatusEnum.CONNECTED) {
91 | return;
92 | }
93 | const constraints = {
94 | audio: false,
95 | video: {
96 | width: {
97 | ideal: BASE_HEIGHT * aspectRatio,
98 | },
99 | height: {
100 | ideal: BASE_HEIGHT,
101 | },
102 | deviceId: mediaDevicedID,
103 | },
104 | };
105 | console.log("Switching camera", constraints);
106 | await navigator.mediaDevices
107 | .getUserMedia(constraints)
108 | .then((stream) => {
109 | mediaStreamStatus.set(MediaStreamStatusEnum.CONNECTED);
110 | mediaStream.set(stream);
111 | })
112 | .catch((err) => {
113 | console.error(`${err.name}: ${err.message}`);
114 | });
115 | },
116 | async stop() {
117 | navigator.mediaDevices.getUserMedia({ video: true }).then((stream) => {
118 | stream.getTracks().forEach((track) => track.stop());
119 | });
120 | mediaStreamStatus.set(MediaStreamStatusEnum.DISCONNECTED);
121 | mediaStream.set(null);
122 | },
123 | };
124 |
--------------------------------------------------------------------------------
/frontend/src/lib/store.ts:
--------------------------------------------------------------------------------
1 | import {
2 | derived,
3 | get,
4 | writable,
5 | type Readable,
6 | type Writable,
7 | } from "svelte/store";
8 |
9 | export type PipelineValues = Record;
10 |
11 | export const pipelineValues: Writable = writable({});
12 | export const deboucedPipelineValues: Readable = derived(
13 | pipelineValues,
14 | ($pipelineValues, set) => {
15 | const debounced = setTimeout(() => {
16 | set($pipelineValues);
17 | }, 100);
18 | return () => clearTimeout(debounced);
19 | },
20 | );
21 |
22 | export const getPipelineValues = () => get(pipelineValues);
23 |
--------------------------------------------------------------------------------
/frontend/src/lib/types.ts:
--------------------------------------------------------------------------------
1 | export const enum FieldType {
2 | RANGE = "range",
3 | SEED = "seed",
4 | TEXTAREA = "textarea",
5 | CHECKBOX = "checkbox",
6 | SELECT = "select",
7 | }
8 | export const enum PipelineMode {
9 | IMAGE = "image",
10 | VIDEO = "video",
11 | TEXT = "text",
12 | }
13 |
14 | export interface Fields {
15 | [key: string]: FieldProps;
16 | }
17 |
18 | export interface FieldProps {
19 | default: number | string;
20 | max?: number;
21 | min?: number;
22 | title: string;
23 | field: FieldType;
24 | step?: number;
25 | disabled?: boolean;
26 | hide?: boolean;
27 | id: string;
28 | values?: string[];
29 | }
30 | export interface PipelineInfo {
31 | title: {
32 | default: string;
33 | };
34 | name: string;
35 | description: string;
36 | input_mode: {
37 | default: PipelineMode;
38 | };
39 | }
40 |
--------------------------------------------------------------------------------
/frontend/src/lib/utils.ts:
--------------------------------------------------------------------------------
1 | import * as piexif from "piexifjs";
2 |
3 | export interface IImageInfo {
4 | prompt?: string;
5 | negative_prompt?: string;
6 | seed?: number;
7 | guidance_scale?: number;
8 | }
9 |
10 | export enum windowType {
11 | image = "image",
12 | }
13 |
14 | export function snapImage(imageEl: HTMLImageElement, info: IImageInfo) {
15 | try {
16 | const zeroth: { [key: string]: string | number } = {};
17 | const exif: { [key: string]: string | number } = {};
18 | const gps: { [key: string]: string | number } = {};
19 | zeroth[piexif.ImageIFD.Make] = "LCM Image-to-Image ControNet";
20 | zeroth[piexif.ImageIFD.ImageDescription] =
21 | `prompt: ${info?.prompt} | negative_prompt: ${info?.negative_prompt} | seed: ${info?.seed} | guidance_scale: ${info?.guidance_scale}`;
22 | zeroth[piexif.ImageIFD.Software] =
23 | "https://github.com/radames/Real-Time-Latent-Consistency-Model";
24 | exif[piexif.ExifIFD.DateTimeOriginal] = new Date().toISOString();
25 |
26 | const exifObj = { "0th": zeroth, Exif: exif, GPS: gps };
27 | const exifBytes = piexif.dump(exifObj);
28 |
29 | const canvas = document.createElement("canvas");
30 | canvas.width = imageEl.naturalWidth;
31 | canvas.height = imageEl.naturalHeight;
32 | const ctx = canvas.getContext("2d") as CanvasRenderingContext2D;
33 | ctx.drawImage(imageEl, 0, 0);
34 | const dataURL = canvas.toDataURL("image/jpeg");
35 | const withExif = piexif.insert(exifBytes, dataURL);
36 |
37 | const a = document.createElement("a");
38 | a.href = withExif;
39 | a.download = `lcm_txt_2_img${Date.now()}.png`;
40 | a.click();
41 | } catch (err) {
42 | console.log(err);
43 | }
44 | }
45 |
46 | export function expandWindow(streamURL: string) {
47 | const newWindow = window.open(
48 | "",
49 | "_blank",
50 | "width=1024,height=1024,scrollbars=0,resizable=1,toolbar=0,menubar=0,location=0,directories=0,status=0",
51 | ) as Window;
52 |
53 | const html = `
54 |
55 |
56 | Real-Time Latent Consistency Model
57 |
64 |
65 |
66 |
85 |
86 |
87 | `;
88 | newWindow.document.write(html);
89 |
90 | const img = newWindow.document.createElement("img");
91 | img.src = streamURL;
92 | img.style.width = "100%";
93 | img.style.height = "100%";
94 | img.style.objectFit = "contain";
95 | newWindow.document.body.appendChild(img);
96 |
97 | return newWindow;
98 | }
99 |
--------------------------------------------------------------------------------
/frontend/src/piexifjs.d.ts:
--------------------------------------------------------------------------------
1 | declare module "piexifjs" {
2 | export const ImageIFD: {
3 | Make: number;
4 | ImageDescription: number;
5 | Software: number;
6 | };
7 | export const ExifIFD: {
8 | DateTimeOriginal: number;
9 | };
10 | export function dump(exifObj: Record): string;
11 | export function insert(exifBytes: string, dataURL: string): string;
12 | }
13 |
--------------------------------------------------------------------------------
/frontend/src/routes/+layout.svelte:
--------------------------------------------------------------------------------
1 |
6 |
7 | {@render children()}
8 |
--------------------------------------------------------------------------------
/frontend/src/routes/+page.svelte:
--------------------------------------------------------------------------------
1 |
146 |
147 |
148 |
151 |
152 |
153 |
154 |
155 |
156 | {#if pageContent}
157 |
158 | {@html pageContent}
159 | {/if}
160 | {#if maxQueueSize > 0}
161 |
162 | There are {currentQueueSize}
165 | user(s) sharing the same GPU, affecting real-time performance. Maximum queue
166 | size is {maxQueueSize}.
167 | Duplicate and run it on your own GPU.
172 |
173 | {/if}
174 |
175 | {#if $lcmLiveStatus === LCMLiveStatus.ERROR}
176 |
177 |
182 | Try reconnecting
183 |
184 |
185 | {/if}
186 |
187 | {#if pipelineParams}
188 |
189 | {#if isImageMode}
190 |
191 |
195 |
196 | {/if}
197 |
198 |
199 |
200 |
201 |
202 | {#if isConnecting}
203 | Connecting...
204 | {:else if isLCMRunning}
205 | Stop
206 | {:else}
207 | Start
208 | {/if}
209 |
210 |
211 |
212 |
213 | {:else}
214 |
215 |
216 |
217 |
Loading...
218 |
219 | {/if}
220 |
221 |
222 |
228 |
--------------------------------------------------------------------------------
/frontend/src/routes/+page.ts:
--------------------------------------------------------------------------------
1 | export const prerender = true;
2 |
--------------------------------------------------------------------------------
/frontend/static/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radames/Real-Time-Latent-Consistency-Model/269f4347d93eb8e366e12b0f8f216c8b11262e76/frontend/static/favicon.png
--------------------------------------------------------------------------------
/frontend/svelte.config.js:
--------------------------------------------------------------------------------
1 | import adapter from "@sveltejs/adapter-static";
2 | import { vitePreprocess } from "@sveltejs/vite-plugin-svelte";
3 |
4 | const config = {
5 | preprocess: vitePreprocess(),
6 | kit: {
7 | adapter: adapter({
8 | pages: "public",
9 | assets: "public",
10 | fallback: undefined,
11 | precompress: false,
12 | strict: true,
13 | }),
14 | },
15 | };
16 |
17 | export default config;
18 |
--------------------------------------------------------------------------------
/frontend/tailwind.config.js:
--------------------------------------------------------------------------------
1 | /** @type {import('tailwindcss').Config} */
2 | export default {
3 | content: ["./src/**/*.{html,js,svelte,ts}"],
4 | theme: {
5 | extend: {},
6 | },
7 | plugins: [import("@tailwindcss/typography")],
8 | };
9 |
--------------------------------------------------------------------------------
/frontend/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "./.svelte-kit/tsconfig.json",
3 | "compilerOptions": {
4 | "allowJs": true,
5 | "checkJs": true,
6 | "esModuleInterop": true,
7 | "forceConsistentCasingInFileNames": true,
8 | "resolveJsonModule": true,
9 | "skipLibCheck": true,
10 | "sourceMap": true,
11 | "strict": true,
12 | "moduleResolution": "bundler"
13 | }
14 | // Path aliases are handled by https://svelte.dev/docs/kit/configuration#alias
15 | // except $lib which is handled by https://svelte.dev/docs/kit/configuration#files
16 | //
17 | // If you want to overwrite includes/excludes, make sure to copy over the relevant includes/excludes
18 | // from the referenced tsconfig.json - TypeScript does not merge them in
19 | }
20 |
--------------------------------------------------------------------------------
/frontend/vite.config.ts:
--------------------------------------------------------------------------------
1 | import { sveltekit } from "@sveltejs/kit/vite";
2 | import tailwindcss from "@tailwindcss/vite";
3 | import { defineConfig } from "vite";
4 |
5 | export default defineConfig({
6 | plugins: [tailwindcss(), sveltekit()],
7 | server: {
8 | proxy: {
9 | "/api": "http://localhost:7860",
10 | "/api/ws": {
11 | target: "ws://localhost:7860",
12 | ws: true,
13 | },
14 | },
15 | },
16 | });
17 |
--------------------------------------------------------------------------------
/qr-code.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radames/Real-Time-Latent-Consistency-Model/269f4347d93eb8e366e12b0f8f216c8b11262e76/qr-code.png
--------------------------------------------------------------------------------
/server/config.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel, field_validator
2 | import argparse
3 | import os
4 | from typing import Annotated
5 |
6 |
7 | class Args(BaseModel):
8 | host: str
9 | port: int
10 | reload: bool
11 | max_queue_size: int
12 | timeout: float
13 | safety_checker: bool
14 | torch_compile: bool
15 | taesd: bool
16 | pipeline: str
17 | ssl_certfile: str | None
18 | ssl_keyfile: str | None
19 | sfast: bool
20 | onediff: bool = False
21 | compel: bool = False
22 | debug: bool = False
23 | pruna: bool = False
24 |
25 | def pretty_print(self) -> None:
26 | print("\n")
27 | for field, value in self.model_dump().items():
28 | print(f"{field}: {value}")
29 | print("\n")
30 |
31 | @field_validator("ssl_keyfile")
32 | @classmethod
33 | def validate_ssl_keyfile(cls, v: str | None, info) -> str | None:
34 | """Validate that if ssl_certfile is provided, ssl_keyfile is also provided."""
35 | ssl_certfile = info.data.get("ssl_certfile")
36 | if ssl_certfile and not v:
37 | raise ValueError(
38 | "If ssl_certfile is provided, ssl_keyfile must also be provided"
39 | )
40 | return v
41 |
42 |
43 | MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
44 | TIMEOUT = float(os.environ.get("TIMEOUT", 0))
45 | SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None) == "True"
46 | TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None) == "True"
47 | USE_TAESD = os.environ.get("USE_TAESD", "False") == "True"
48 | default_host = os.getenv("HOST", "0.0.0.0")
49 | default_port = int(os.getenv("PORT", "7860"))
50 |
51 | parser = argparse.ArgumentParser(description="Run the app")
52 | parser.add_argument("--host", type=str, default=default_host, help="Host address")
53 | parser.add_argument("--port", type=int, default=default_port, help="Port number")
54 | parser.add_argument("--reload", action="store_true", help="Reload code on change")
55 | parser.add_argument(
56 | "--max-queue-size",
57 | dest="max_queue_size",
58 | type=int,
59 | default=MAX_QUEUE_SIZE,
60 | help="Max Queue Size",
61 | )
62 | parser.add_argument("--timeout", type=float, default=TIMEOUT, help="Timeout")
63 | parser.add_argument(
64 | "--safety-checker",
65 | dest="safety_checker",
66 | action="store_true",
67 | default=SAFETY_CHECKER,
68 | help="Safety Checker",
69 | )
70 | parser.add_argument(
71 | "--torch-compile",
72 | dest="torch_compile",
73 | action="store_true",
74 | default=TORCH_COMPILE,
75 | help="Torch Compile",
76 | )
77 | parser.add_argument(
78 | "--taesd",
79 | dest="taesd",
80 | action="store_true",
81 | help="Use Tiny Autoencoder",
82 | )
83 | parser.add_argument(
84 | "--pipeline",
85 | type=str,
86 | default="txt2img",
87 | help="Pipeline to use",
88 | )
89 | parser.add_argument(
90 | "--ssl-certfile",
91 | dest="ssl_certfile",
92 | type=str,
93 | default=None,
94 | help="SSL certfile",
95 | )
96 | parser.add_argument(
97 | "--ssl-keyfile",
98 | dest="ssl_keyfile",
99 | type=str,
100 | default=None,
101 | help="SSL keyfile",
102 | )
103 | parser.add_argument(
104 | "--debug",
105 | action="store_true",
106 | default=False,
107 | help="Debug",
108 | )
109 | parser.add_argument(
110 | "--compel",
111 | action="store_true",
112 | default=False,
113 | help="Compel",
114 | )
115 | parser.add_argument(
116 | "--sfast",
117 | action="store_true",
118 | default=False,
119 | help="Enable Stable Fast",
120 | )
121 | parser.add_argument(
122 | "--onediff",
123 | action="store_true",
124 | default=False,
125 | help="Enable OneDiff",
126 | )
127 | parser.add_argument(
128 | "--pruna",
129 | action="store_true",
130 | default=False,
131 | help="Enable Pruna",
132 | )
133 | parser.set_defaults(taesd=USE_TAESD)
134 |
135 | config = Args.model_validate(vars(parser.parse_args()))
136 | config.pretty_print()
137 |
--------------------------------------------------------------------------------
/server/device.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | # check if MPS is available OSX only M1/M2/M3 chips
4 | mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
5 | xpu_available = hasattr(torch, "xpu") and torch.xpu.is_available()
6 | device = torch.device(
7 | "cuda" if torch.cuda.is_available() else "xpu" if xpu_available else "cpu"
8 | )
9 | torch_dtype = torch.float16
10 | if mps_available:
11 | device = torch.device("mps")
12 | torch_dtype = torch.float32
13 |
--------------------------------------------------------------------------------
/server/pipelines/IPcompositionHyperSD15.py:
--------------------------------------------------------------------------------
1 | from diffusers import (
2 | DiffusionPipeline,
3 | TCDScheduler,
4 | )
5 | from compel import Compel
6 | import torch
7 | from transformers import CLIPVisionModelWithProjection
8 | from huggingface_hub import hf_hub_download
9 |
10 | try:
11 | import intel_extension_for_pytorch as ipex # type: ignore
12 | except:
13 | pass
14 |
15 | from config import Args
16 | from pydantic import BaseModel, Field
17 | from util import ParamsModel
18 | from PIL import Image
19 |
20 | model_id = "runwayml/stable-diffusion-v1-5"
21 | ip_adapter_model = "ostris/ip-composition-adapter"
22 | file_name = "ip_plus_composition_sd15.safetensors"
23 |
24 | default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
25 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
26 | page_content = """
27 | Hyper-SD Unified + IP Adpater Composition
28 | Image-to-Image ControlNet
29 |
30 | """
31 |
32 |
33 | class Pipeline:
34 | class Info(BaseModel):
35 | name: str = "controlnet+SDXL+Turbo"
36 | title: str = "SDXL Turbo + Controlnet"
37 | description: str = "Generates an image from a text prompt"
38 | input_mode: str = "image"
39 | page_content: str = page_content
40 |
41 | class InputParams(ParamsModel):
42 | prompt: str = Field(
43 | default_prompt,
44 | title="Prompt",
45 | field="textarea",
46 | id="prompt",
47 | )
48 | negative_prompt: str = Field(
49 | default_negative_prompt,
50 | title="Negative Prompt",
51 | field="textarea",
52 | id="negative_prompt",
53 | hide=True,
54 | )
55 | seed: int = Field(
56 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
57 | )
58 | steps: int = Field(
59 | 2, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
60 | )
61 | width: int = Field(
62 | 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
63 | )
64 | height: int = Field(
65 | 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
66 | )
67 | guidance_scale: float = Field(
68 | 0.0,
69 | min=0,
70 | max=10,
71 | step=0.001,
72 | title="Guidance Scale",
73 | field="range",
74 | hide=True,
75 | id="guidance_scale",
76 | )
77 | ip_adapter_scale: float = Field(
78 | 0.8,
79 | min=0.0,
80 | max=1.0,
81 | step=0.001,
82 | title="IP Adapter Scale",
83 | field="range",
84 | hide=True,
85 | id="ip_adapter_scale",
86 | )
87 | eta: float = Field(
88 | 1.0,
89 | min=0,
90 | max=1.0,
91 | step=0.001,
92 | title="Eta",
93 | field="range",
94 | hide=True,
95 | id="eta",
96 | )
97 |
98 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
99 | image_encoder = CLIPVisionModelWithProjection.from_pretrained(
100 | "h94/IP-Adapter",
101 | subfolder="models/image_encoder",
102 | torch_dtype=torch.float16,
103 | ).to(device)
104 |
105 | self.pipe = DiffusionPipeline.from_pretrained(
106 | model_id,
107 | safety_checker=None,
108 | torch_dtype=torch_dtype,
109 | image_encoder=image_encoder,
110 | variant="fp16",
111 | )
112 |
113 | self.pipe.load_ip_adapter(
114 | ip_adapter_model,
115 | subfolder="",
116 | weight_name=[file_name],
117 | image_encoder_folder=None,
118 | )
119 |
120 | self.pipe.load_lora_weights(
121 | hf_hub_download("ByteDance/Hyper-SD", "Hyper-SD15-1step-lora.safetensors")
122 | )
123 | self.pipe.fuse_lora()
124 |
125 | self.pipe.scheduler = TCDScheduler.from_config(self.pipe.scheduler.config)
126 | self.pipe.set_ip_adapter_scale([0.8])
127 |
128 | # if args.compile:
129 | # pipe.unet = oneflow_compile(pipe.unet, options=compile_options)
130 | # pipe.vae.decoder = oneflow_compile(pipe.vae.decoder, options=compile_options)
131 |
132 | if args.sfast:
133 | from sfast.compilers.stable_diffusion_pipeline_compiler import (
134 | compile,
135 | CompilationConfig,
136 | )
137 |
138 | config = CompilationConfig.Default()
139 | # config.enable_xformers = True
140 | config.enable_triton = True
141 | config.enable_cuda_graph = True
142 | # cofig.
143 | self.pipe = compile(self.pipe, config=config)
144 |
145 | self.pipe.set_progress_bar_config(disable=True)
146 | self.pipe.to(device=device)
147 | if device.type != "mps":
148 | self.pipe.unet.to(memory_format=torch.channels_last)
149 |
150 | if args.compel:
151 | self.compel_proc = Compel(
152 | tokenizer=self.pipe.tokenizer,
153 | text_encoder=self.pipe.text_encoder,
154 | truncate_long_prompts=False,
155 | )
156 |
157 | if args.torch_compile:
158 | self.pipe.unet = torch.compile(
159 | self.pipe.unet, mode="reduce-overhead", fullgraph=True
160 | )
161 | self.pipe.vae = torch.compile(
162 | self.pipe.vae, mode="reduce-overhead", fullgraph=True
163 | )
164 | self.pipe(
165 | prompt="warmup",
166 | image=[Image.new("RGB", (768, 768))],
167 | )
168 |
169 | def predict(self, params: "Pipeline.InputParams") -> Image.Image:
170 | generator = torch.manual_seed(params.seed)
171 | self.pipe.set_ip_adapter_scale([params.ip_adapter_scale])
172 |
173 | prompt_embeds = None
174 | prompt = params.prompt
175 | if hasattr(self, "compel_proc"):
176 | prompt_embeds = self.compel_proc(prompt)
177 | prompt = None
178 |
179 | steps = params.steps
180 |
181 | results = self.pipe(
182 | prompt=prompt,
183 | prompt_embeds=prompt_embeds,
184 | generator=generator,
185 | num_inference_steps=steps,
186 | guidance_scale=params.guidance_scale,
187 | width=params.width,
188 | eta=params.eta,
189 | height=params.height,
190 | ip_adapter_image=[params.image],
191 | output_type="pil",
192 | )
193 |
194 | return results.images[0]
195 |
--------------------------------------------------------------------------------
/server/pipelines/IPcompositionHyperSDXL.py:
--------------------------------------------------------------------------------
1 | from diffusers import (
2 | StableDiffusionXLPipeline,
3 | AutoencoderKL,
4 | TCDScheduler,
5 | )
6 | from compel import Compel, ReturnedEmbeddingsType
7 | import torch
8 | from transformers import CLIPVisionModelWithProjection
9 | from huggingface_hub import hf_hub_download
10 |
11 | try:
12 | import intel_extension_for_pytorch as ipex # type: ignore
13 | except:
14 | pass
15 |
16 | from config import Args
17 | from pydantic import BaseModel, Field
18 | from util import ParamsModel
19 | from PIL import Image
20 |
21 | model_id = "stabilityai/stable-diffusion-xl-base-1.0"
22 | taesd_model = "madebyollin/taesdxl"
23 | ip_adapter_model = "ostris/ip-composition-adapter"
24 | file_name = "ip_plus_composition_sdxl.safetensors"
25 |
26 | default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
27 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
28 | page_content = """
29 | Hyper-SDXL Unified + IP Adpater Composition
30 | Image-to-Image ControlNet
31 |
32 | """
33 |
34 |
35 | class Pipeline:
36 | class Info(BaseModel):
37 | name: str = "controlnet+SDXL+Turbo"
38 | title: str = "SDXL Turbo + Controlnet"
39 | description: str = "Generates an image from a text prompt"
40 | input_mode: str = "image"
41 | page_content: str = page_content
42 |
43 | class InputParams(ParamsModel):
44 | prompt: str = Field(
45 | default_prompt,
46 | title="Prompt",
47 | field="textarea",
48 | id="prompt",
49 | )
50 | negative_prompt: str = Field(
51 | default_negative_prompt,
52 | title="Negative Prompt",
53 | field="textarea",
54 | id="negative_prompt",
55 | hide=True,
56 | )
57 | seed: int = Field(
58 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
59 | )
60 | steps: int = Field(
61 | 2, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
62 | )
63 | width: int = Field(
64 | 1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
65 | )
66 | height: int = Field(
67 | 1024, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
68 | )
69 | guidance_scale: float = Field(
70 | 0.0,
71 | min=0,
72 | max=10,
73 | step=0.001,
74 | title="Guidance Scale",
75 | field="range",
76 | hide=True,
77 | id="guidance_scale",
78 | )
79 | ip_adapter_scale: float = Field(
80 | 0.8,
81 | min=0.0,
82 | max=1.0,
83 | step=0.001,
84 | title="IP Adapter Scale",
85 | field="range",
86 | hide=True,
87 | id="ip_adapter_scale",
88 | )
89 | eta: float = Field(
90 | 1.0,
91 | min=0,
92 | max=1.0,
93 | step=0.001,
94 | title="Eta",
95 | field="range",
96 | hide=True,
97 | id="eta",
98 | )
99 |
100 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
101 | vae = AutoencoderKL.from_pretrained(
102 | "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype
103 | )
104 | image_encoder = CLIPVisionModelWithProjection.from_pretrained(
105 | "h94/IP-Adapter",
106 | subfolder="models/image_encoder",
107 | torch_dtype=torch.float16,
108 | ).to(device)
109 |
110 | self.pipe = StableDiffusionXLPipeline.from_pretrained(
111 | model_id,
112 | safety_checker=None,
113 | torch_dtype=torch_dtype,
114 | vae=vae,
115 | image_encoder=image_encoder,
116 | variant="fp16",
117 | )
118 | self.pipe.load_ip_adapter(
119 | ip_adapter_model,
120 | subfolder="",
121 | weight_name=[file_name],
122 | image_encoder_folder=None,
123 | )
124 |
125 | self.pipe.load_lora_weights(
126 | hf_hub_download("ByteDance/Hyper-SD", "Hyper-SDXL-1step-lora.safetensors")
127 | )
128 | self.pipe.fuse_lora()
129 |
130 | self.pipe.scheduler = TCDScheduler.from_config(self.pipe.scheduler.config)
131 | self.pipe.set_ip_adapter_scale([0.8])
132 |
133 | if args.sfast:
134 | from sfast.compilers.stable_diffusion_pipeline_compiler import (
135 | compile,
136 | CompilationConfig,
137 | )
138 |
139 | config = CompilationConfig.Default()
140 | # config.enable_xformers = True
141 | config.enable_triton = True
142 | config.enable_cuda_graph = True
143 | self.pipe = compile(self.pipe, config=config)
144 |
145 | self.pipe.set_progress_bar_config(disable=True)
146 | self.pipe.to(device=device)
147 | if device.type != "mps":
148 | self.pipe.unet.to(memory_format=torch.channels_last)
149 |
150 | if args.compel:
151 | self.pipe.compel_proc = Compel(
152 | tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
153 | text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
154 | returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
155 | requires_pooled=[False, True],
156 | )
157 |
158 | if args.torch_compile:
159 | self.pipe.unet = torch.compile(
160 | self.pipe.unet, mode="reduce-overhead", fullgraph=True
161 | )
162 | self.pipe.vae = torch.compile(
163 | self.pipe.vae, mode="reduce-overhead", fullgraph=True
164 | )
165 | self.pipe(
166 | prompt="warmup",
167 | image=[Image.new("RGB", (768, 768))],
168 | )
169 |
170 | def predict(self, params: "Pipeline.InputParams") -> Image.Image:
171 | generator = torch.manual_seed(params.seed)
172 | self.pipe.set_ip_adapter_scale([params.ip_adapter_scale])
173 |
174 | prompt = params.prompt
175 | negative_prompt = params.negative_prompt
176 | prompt_embeds = None
177 | pooled_prompt_embeds = None
178 | negative_prompt_embeds = None
179 | negative_pooled_prompt_embeds = None
180 | if hasattr(self.pipe, "compel_proc"):
181 | _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
182 | [params.prompt, params.negative_prompt]
183 | )
184 | prompt = None
185 | negative_prompt = None
186 | prompt_embeds = _prompt_embeds[0:1]
187 | pooled_prompt_embeds = pooled_prompt_embeds[0:1]
188 | negative_prompt_embeds = _prompt_embeds[1:2]
189 | negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2]
190 |
191 | steps = params.steps
192 |
193 | results = self.pipe(
194 | prompt=prompt,
195 | negative_prompt=negative_prompt,
196 | prompt_embeds=prompt_embeds,
197 | pooled_prompt_embeds=pooled_prompt_embeds,
198 | negative_prompt_embeds=negative_prompt_embeds,
199 | negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
200 | generator=generator,
201 | num_inference_steps=steps,
202 | guidance_scale=params.guidance_scale,
203 | width=params.width,
204 | eta=params.eta,
205 | height=params.height,
206 | ip_adapter_image=[params.image],
207 | output_type="pil",
208 | )
209 | return results.images[0]
210 |
--------------------------------------------------------------------------------
/server/pipelines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radames/Real-Time-Latent-Consistency-Model/269f4347d93eb8e366e12b0f8f216c8b11262e76/server/pipelines/__init__.py
--------------------------------------------------------------------------------
/server/pipelines/controlnetFlashSD.py:
--------------------------------------------------------------------------------
1 | from diffusers import (
2 | StableDiffusionControlNetImg2ImgPipeline,
3 | ControlNetModel,
4 | AutoencoderTiny,
5 | LCMScheduler,
6 | )
7 | from compel import Compel, ReturnedEmbeddingsType
8 | import torch
9 | from pipelines.utils.canny_gpu import SobelOperator
10 |
11 | try:
12 | import intel_extension_for_pytorch as ipex # type: ignore
13 | except:
14 | pass
15 |
16 | from config import Args
17 | from pydantic import BaseModel, Field
18 | from util import ParamsModel
19 | from PIL import Image
20 | import math
21 |
22 | controlnet_model = "lllyasviel/control_v11p_sd15_canny"
23 | model_id = "runwayml/stable-diffusion-v1-5"
24 | taesd_model = "madebyollin/taesd"
25 |
26 | default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
27 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
28 | page_content = """
29 | Flash-SD
30 | Image-to-Image ControlNet
31 |
32 | """
33 |
34 |
35 | class Pipeline:
36 | class Info(BaseModel):
37 | name: str = "controlnet+SDXL+Turbo"
38 | title: str = "SDXL Turbo + Controlnet"
39 | description: str = "Generates an image from a text prompt"
40 | input_mode: str = "image"
41 | page_content: str = page_content
42 |
43 | class InputParams(ParamsModel):
44 | prompt: str = Field(
45 | default_prompt,
46 | title="Prompt",
47 | field="textarea",
48 | id="prompt",
49 | )
50 | negative_prompt: str = Field(
51 | default_negative_prompt,
52 | title="Negative Prompt",
53 | field="textarea",
54 | id="negative_prompt",
55 | hide=True,
56 | )
57 | seed: int = Field(
58 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
59 | )
60 | steps: int = Field(
61 | 2, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
62 | )
63 | width: int = Field(
64 | 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
65 | )
66 | height: int = Field(
67 | 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
68 | )
69 | strength: float = Field(
70 | 0.5,
71 | min=0.25,
72 | max=1.0,
73 | step=0.001,
74 | title="Strength",
75 | field="range",
76 | hide=True,
77 | id="strength",
78 | )
79 | controlnet_scale: float = Field(
80 | 0.5,
81 | min=0,
82 | max=1.0,
83 | step=0.001,
84 | title="Controlnet Scale",
85 | field="range",
86 | hide=True,
87 | id="controlnet_scale",
88 | )
89 | controlnet_start: float = Field(
90 | 0.0,
91 | min=0,
92 | max=1.0,
93 | step=0.001,
94 | title="Controlnet Start",
95 | field="range",
96 | hide=True,
97 | id="controlnet_start",
98 | )
99 | controlnet_end: float = Field(
100 | 1.0,
101 | min=0,
102 | max=1.0,
103 | step=0.001,
104 | title="Controlnet End",
105 | field="range",
106 | hide=True,
107 | id="controlnet_end",
108 | )
109 | canny_low_threshold: float = Field(
110 | 0.31,
111 | min=0,
112 | max=1.0,
113 | step=0.001,
114 | title="Canny Low Threshold",
115 | field="range",
116 | hide=True,
117 | id="canny_low_threshold",
118 | )
119 | canny_high_threshold: float = Field(
120 | 0.125,
121 | min=0,
122 | max=1.0,
123 | step=0.001,
124 | title="Canny High Threshold",
125 | field="range",
126 | hide=True,
127 | id="canny_high_threshold",
128 | )
129 | debug_canny: bool = Field(
130 | False,
131 | title="Debug Canny",
132 | field="checkbox",
133 | hide=True,
134 | id="debug_canny",
135 | )
136 |
137 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
138 | controlnet_canny = ControlNetModel.from_pretrained(
139 | controlnet_model, torch_dtype=torch_dtype
140 | )
141 |
142 | self.pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
143 | model_id,
144 | safety_checker=None,
145 | controlnet=controlnet_canny,
146 | torch_dtype=torch_dtype,
147 | )
148 |
149 | self.pipe.scheduler = LCMScheduler.from_pretrained(
150 | model_id,
151 | subfolder="scheduler",
152 | timestep_spacing="trailing",
153 | )
154 |
155 | if args.taesd:
156 | self.pipe.vae = AutoencoderTiny.from_pretrained(
157 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True
158 | )
159 | self.pipe.load_lora_weights("jasperai/flash-sd")
160 | self.pipe.fuse_lora()
161 |
162 | self.canny_torch = SobelOperator(device=device)
163 |
164 | if args.sfast:
165 | from sfast.compilers.stable_diffusion_pipeline_compiler import (
166 | compile,
167 | CompilationConfig,
168 | )
169 |
170 | config = CompilationConfig.Default()
171 | # config.enable_xformers = True
172 | config.enable_triton = True
173 | config.enable_cuda_graph = True
174 | self.pipe = compile(self.pipe, config=config)
175 |
176 | self.pipe.set_progress_bar_config(disable=True)
177 | self.pipe.to(device=device)
178 | if device.type != "mps":
179 | self.pipe.unet.to(memory_format=torch.channels_last)
180 |
181 | if args.compel:
182 | self.pipe.compel_proc = Compel(
183 | tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
184 | text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
185 | returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
186 | requires_pooled=[False, True],
187 | )
188 |
189 | if args.torch_compile:
190 | self.pipe.unet = torch.compile(
191 | self.pipe.unet, mode="reduce-overhead", fullgraph=True
192 | )
193 | self.pipe.vae = torch.compile(
194 | self.pipe.vae, mode="reduce-overhead", fullgraph=True
195 | )
196 | self.pipe(
197 | prompt="warmup",
198 | image=[Image.new("RGB", (768, 768))],
199 | control_image=[Image.new("RGB", (768, 768))],
200 | )
201 |
202 | def predict(self, params: "Pipeline.InputParams") -> Image.Image:
203 | generator = torch.manual_seed(params.seed)
204 |
205 | prompt = params.prompt
206 | negative_prompt = params.negative_prompt
207 | prompt_embeds = None
208 | pooled_prompt_embeds = None
209 | negative_prompt_embeds = None
210 | negative_pooled_prompt_embeds = None
211 | if hasattr(self.pipe, "compel_proc"):
212 | _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
213 | [params.prompt, params.negative_prompt]
214 | )
215 | prompt = None
216 | negative_prompt = None
217 | prompt_embeds = _prompt_embeds[0:1]
218 | pooled_prompt_embeds = pooled_prompt_embeds[0:1]
219 | negative_prompt_embeds = _prompt_embeds[1:2]
220 | negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2]
221 |
222 | control_image = self.canny_torch(
223 | params.image, params.canny_low_threshold, params.canny_high_threshold
224 | )
225 | steps = params.steps
226 | strength = params.strength
227 | if int(steps * strength) < 1:
228 | steps = math.ceil(1 / max(0.10, strength))
229 |
230 | results = self.pipe(
231 | image=params.image,
232 | control_image=control_image,
233 | prompt=prompt,
234 | negative_prompt=negative_prompt,
235 | prompt_embeds=prompt_embeds,
236 | pooled_prompt_embeds=pooled_prompt_embeds,
237 | negative_prompt_embeds=negative_prompt_embeds,
238 | negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
239 | generator=generator,
240 | strength=strength,
241 | num_inference_steps=steps,
242 | guidance_scale=0,
243 | width=params.width,
244 | height=params.height,
245 | output_type="pil",
246 | controlnet_conditioning_scale=params.controlnet_scale,
247 | control_guidance_start=params.controlnet_start,
248 | control_guidance_end=params.controlnet_end,
249 | )
250 |
251 | result_image = results.images[0]
252 | if params.debug_canny:
253 | # paste control_image on top of result_image
254 | w0, h0 = (200, 200)
255 | control_image = control_image.resize((w0, h0))
256 | w1, h1 = result_image.size
257 | result_image.paste(control_image, (w1 - w0, h1 - h0))
258 |
259 | return result_image
260 |
--------------------------------------------------------------------------------
/server/pipelines/controlnetLoraSD15QRCode.py:
--------------------------------------------------------------------------------
1 | from diffusers import (
2 | StableDiffusionControlNetImg2ImgPipeline,
3 | ControlNetModel,
4 | LCMScheduler,
5 | AutoencoderTiny,
6 | )
7 | from compel import Compel
8 | import torch
9 |
10 | try:
11 | import intel_extension_for_pytorch as ipex # type: ignore
12 | except:
13 | pass
14 |
15 | import psutil
16 | from config import Args
17 | from pydantic import BaseModel, Field
18 | from util import ParamsModel
19 | from PIL import Image
20 | import math
21 |
22 | taesd_model = "madebyollin/taesd"
23 | controlnet_model = "monster-labs/control_v1p_sd15_qrcode_monster"
24 | base_model = "nitrosocke/mo-di-diffusion"
25 | lcm_lora_id = "latent-consistency/lcm-lora-sdv1-5"
26 | default_prompt = "abstract art of a men with curly hair by Pablo Picasso"
27 | page_content = """
28 | Real-Time Latent Consistency Model SDv1.5
29 | LCM + LoRA + Controlnet + QRCode
30 |
31 | This demo showcases
32 | LCM LoRA
36 | + ControlNet + Image to Imasge pipeline using
37 | Diffusers with a MJPEG stream server.
42 |
43 |
44 | Change the prompt to generate different images, accepts Compel syntax.
49 |
50 | """
51 |
52 |
53 | class Pipeline:
54 | class Info(BaseModel):
55 | name: str = "controlnet+loras+sd15"
56 | title: str = "LCM + LoRA + Controlnet"
57 | description: str = "Generates an image from a text prompt"
58 | input_mode: str = "image"
59 | page_content: str = page_content
60 |
61 | class InputParams(ParamsModel):
62 | prompt: str = Field(
63 | default_prompt,
64 | title="Prompt",
65 | field="textarea",
66 | id="prompt",
67 | )
68 | seed: int = Field(
69 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
70 | )
71 | steps: int = Field(
72 | 5, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
73 | )
74 | width: int = Field(
75 | 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
76 | )
77 | height: int = Field(
78 | 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
79 | )
80 | guidance_scale: float = Field(
81 | 1.0,
82 | min=0,
83 | max=2,
84 | step=0.001,
85 | title="Guidance Scale",
86 | field="range",
87 | hide=True,
88 | id="guidance_scale",
89 | )
90 | strength: float = Field(
91 | 0.6,
92 | min=0.25,
93 | max=1.0,
94 | step=0.001,
95 | title="Strength",
96 | field="range",
97 | hide=True,
98 | id="strength",
99 | )
100 | controlnet_scale: float = Field(
101 | 1.0,
102 | min=0,
103 | max=1.0,
104 | step=0.001,
105 | title="Controlnet Scale",
106 | field="range",
107 | hide=True,
108 | id="controlnet_scale",
109 | )
110 | controlnet_start: float = Field(
111 | 0.0,
112 | min=0,
113 | max=1.0,
114 | step=0.001,
115 | title="Controlnet Start",
116 | field="range",
117 | hide=True,
118 | id="controlnet_start",
119 | )
120 | controlnet_end: float = Field(
121 | 1.0,
122 | min=0,
123 | max=1.0,
124 | step=0.001,
125 | title="Controlnet End",
126 | field="range",
127 | hide=True,
128 | id="controlnet_end",
129 | )
130 | blend: float = Field(
131 | 0.1,
132 | min=0.0,
133 | max=1.0,
134 | step=0.001,
135 | title="Blend",
136 | field="range",
137 | hide=True,
138 | id="blend",
139 | )
140 |
141 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
142 | controlnet_qrcode = ControlNetModel.from_pretrained(
143 | controlnet_model, torch_dtype=torch_dtype, subfolder="v2"
144 | ).to(device)
145 |
146 | if args.safety_checker:
147 | self.pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
148 | base_model,
149 | controlnet=controlnet_qrcode,
150 | )
151 | else:
152 | self.pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
153 | base_model,
154 | safety_checker=None,
155 | controlnet=controlnet_qrcode,
156 | )
157 |
158 | self.control_image = Image.open("qr-code.png").convert("RGB").resize((512, 512))
159 |
160 | self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
161 | self.pipe.set_progress_bar_config(disable=True)
162 | if device.type != "mps":
163 | self.pipe.unet.to(memory_format=torch.channels_last)
164 |
165 | if args.taesd:
166 | self.pipe.vae = AutoencoderTiny.from_pretrained(
167 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True
168 | ).to(device)
169 |
170 | # Load LCM LoRA
171 | self.pipe.load_lora_weights(lcm_lora_id, adapter_name="lcm")
172 | self.pipe.to(device=device, dtype=torch_dtype).to(device)
173 | if args.compel:
174 | self.compel_proc = Compel(
175 | tokenizer=self.pipe.tokenizer,
176 | text_encoder=self.pipe.text_encoder,
177 | truncate_long_prompts=False,
178 | )
179 | if args.torch_compile:
180 | self.pipe.unet = torch.compile(
181 | self.pipe.unet, mode="reduce-overhead", fullgraph=True
182 | )
183 | self.pipe.vae = torch.compile(
184 | self.pipe.vae, mode="reduce-overhead", fullgraph=True
185 | )
186 | self.pipe(
187 | prompt="warmup",
188 | image=[Image.new("RGB", (512, 512))],
189 | control_image=[Image.new("RGB", (512, 512))],
190 | )
191 |
192 | def predict(self, params: "Pipeline.InputParams") -> Image.Image:
193 | generator = torch.manual_seed(params.seed)
194 |
195 | prompt = f"modern disney style {params.prompt}"
196 | prompt_embeds = None
197 | prompt = params.prompt
198 | if hasattr(self, "compel_proc"):
199 | prompt_embeds = self.compel_proc(prompt)
200 | prompt = None
201 |
202 | steps = params.steps
203 | strength = params.strength
204 | if int(steps * strength) < 1:
205 | steps = math.ceil(1 / max(0.10, strength))
206 |
207 | blend_qr_image = Image.blend(
208 | params.image, self.control_image, alpha=params.blend
209 | )
210 | results = self.pipe(
211 | image=blend_qr_image,
212 | control_image=self.control_image,
213 | prompt=prompt,
214 | prompt_embeds=prompt_embeds,
215 | generator=generator,
216 | strength=strength,
217 | num_inference_steps=steps,
218 | guidance_scale=params.guidance_scale,
219 | width=params.width,
220 | height=params.height,
221 | output_type="pil",
222 | controlnet_conditioning_scale=params.controlnet_scale,
223 | control_guidance_start=params.controlnet_start,
224 | control_guidance_end=params.controlnet_end,
225 | )
226 |
227 | return results.images[0]
228 |
--------------------------------------------------------------------------------
/server/pipelines/controlnetPCMSD15.py:
--------------------------------------------------------------------------------
1 | from diffusers import (
2 | StableDiffusionControlNetImg2ImgPipeline,
3 | ControlNetModel,
4 | TCDScheduler,
5 | AutoencoderTiny,
6 | )
7 | from compel import Compel
8 | import torch
9 | from pipelines.utils.canny_gpu import SobelOperator
10 |
11 | try:
12 | import intel_extension_for_pytorch as ipex # type: ignore
13 | except:
14 | pass
15 |
16 | from config import Args
17 | from pydantic import BaseModel, Field
18 | from util import ParamsModel
19 | from PIL import Image
20 |
21 | taesd_model = "madebyollin/taesd"
22 | controlnet_model = "lllyasviel/control_v11p_sd15_canny"
23 | base_model_id = "runwayml/stable-diffusion-v1-5"
24 | pcm_base = "wangfuyun/PCM_Weights"
25 | pcm_lora_ckpts = {
26 | "2-Step": ["pcm_sd15_smallcfg_2step_converted.safetensors", 2, 0.0],
27 | "4-Step": ["pcm_sd15_smallcfg_4step_converted.safetensors", 4, 0.0],
28 | "8-Step": ["pcm_sd15_smallcfg_8step_converted.safetensors", 8, 0.0],
29 | "16-Step": ["pcm_sd15_smallcfg_16step_converted.safetensors", 16, 0.0],
30 | "Normal CFG 4-Step": ["pcm_sd15_normalcfg_4step_converted.safetensors", 4, 7.5],
31 | "Normal CFG 8-Step": ["pcm_sd15_normalcfg_8step_converted.safetensors", 8, 7.5],
32 | "Normal CFG 16-Step": ["pcm_sd15_normalcfg_16step_converted.safetensors", 16, 7.5],
33 | }
34 | default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
35 | page_content = """
36 |
37 | """
38 |
39 |
40 | class Pipeline:
41 | class Info(BaseModel):
42 | name: str = "controlnet+loras+sd15"
43 | title: str = "LCM + LoRA + Controlnet"
44 | description: str = "Generates an image from a text prompt"
45 | input_mode: str = "image"
46 | page_content: str = page_content
47 |
48 | class InputParams(ParamsModel):
49 | prompt: str = Field(
50 | default_prompt,
51 | title="Prompt",
52 | field="textarea",
53 | id="prompt",
54 | )
55 | lora_ckpt_id: str = Field(
56 | "4-Step",
57 | title="PCM Base Model",
58 | values=list(pcm_lora_ckpts.keys()),
59 | field="select",
60 | id="lora_ckpt_id",
61 | )
62 | seed: int = Field(
63 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
64 | )
65 | width: int = Field(
66 | 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
67 | )
68 | height: int = Field(
69 | 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
70 | )
71 | strength: float = Field(
72 | 0.5,
73 | min=0.25,
74 | max=1.0,
75 | step=0.001,
76 | title="Strength",
77 | field="range",
78 | hide=True,
79 | id="strength",
80 | )
81 | controlnet_scale: float = Field(
82 | 0.8,
83 | min=0,
84 | max=1.0,
85 | step=0.001,
86 | title="Controlnet Scale",
87 | field="range",
88 | hide=True,
89 | id="controlnet_scale",
90 | )
91 | controlnet_start: float = Field(
92 | 0.0,
93 | min=0,
94 | max=1.0,
95 | step=0.001,
96 | title="Controlnet Start",
97 | field="range",
98 | hide=True,
99 | id="controlnet_start",
100 | )
101 | controlnet_end: float = Field(
102 | 1.0,
103 | min=0,
104 | max=1.0,
105 | step=0.001,
106 | title="Controlnet End",
107 | field="range",
108 | hide=True,
109 | id="controlnet_end",
110 | )
111 | canny_low_threshold: float = Field(
112 | 0.31,
113 | min=0,
114 | max=1.0,
115 | step=0.001,
116 | title="Canny Low Threshold",
117 | field="range",
118 | hide=True,
119 | id="canny_low_threshold",
120 | )
121 | canny_high_threshold: float = Field(
122 | 0.125,
123 | min=0,
124 | max=1.0,
125 | step=0.001,
126 | title="Canny High Threshold",
127 | field="range",
128 | hide=True,
129 | id="canny_high_threshold",
130 | )
131 | debug_canny: bool = Field(
132 | False,
133 | title="Debug Canny",
134 | field="checkbox",
135 | hide=True,
136 | id="debug_canny",
137 | )
138 |
139 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
140 | controlnet_canny = ControlNetModel.from_pretrained(
141 | controlnet_model, torch_dtype=torch_dtype
142 | ).to(device)
143 |
144 | self.pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
145 | base_model_id,
146 | safety_checker=None,
147 | controlnet=controlnet_canny,
148 | )
149 |
150 | self.canny_torch = SobelOperator(device=device)
151 |
152 | self.pipe.scheduler = TCDScheduler(
153 | num_train_timesteps=1000,
154 | beta_start=0.00085,
155 | beta_end=0.012,
156 | beta_schedule="scaled_linear",
157 | timestep_spacing="trailing",
158 | )
159 |
160 | self.pipe.set_progress_bar_config(disable=True)
161 | if device.type != "mps":
162 | self.pipe.unet.to(memory_format=torch.channels_last)
163 |
164 | if args.taesd:
165 | self.pipe.vae = AutoencoderTiny.from_pretrained(
166 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True
167 | ).to(device)
168 |
169 | self.loaded_lora = "4-Step"
170 | self.pipe.load_lora_weights(
171 | pcm_base,
172 | weight_name=pcm_lora_ckpts[self.loaded_lora][0],
173 | subfolder="sd15",
174 | )
175 | self.pipe.to(device=device, dtype=torch_dtype).to(device)
176 | if args.compel:
177 | self.compel_proc = Compel(
178 | tokenizer=self.pipe.tokenizer,
179 | text_encoder=self.pipe.text_encoder,
180 | truncate_long_prompts=False,
181 | )
182 | if args.torch_compile:
183 | self.pipe.unet = torch.compile(
184 | self.pipe.unet, mode="reduce-overhead", fullgraph=True
185 | )
186 | self.pipe.vae = torch.compile(
187 | self.pipe.vae, mode="reduce-overhead", fullgraph=True
188 | )
189 | self.pipe(
190 | prompt="warmup",
191 | image=[Image.new("RGB", (768, 768))],
192 | control_image=[Image.new("RGB", (768, 768))],
193 | )
194 |
195 | def predict(self, params: "Pipeline.InputParams") -> Image.Image:
196 | generator = torch.manual_seed(params.seed)
197 | guidance_scale = pcm_lora_ckpts[params.lora_ckpt_id][2]
198 | steps = pcm_lora_ckpts[params.lora_ckpt_id][1]
199 |
200 | if self.loaded_lora != params.lora_ckpt_id:
201 | checkpoint = pcm_lora_ckpts[params.lora_ckpt_id][0]
202 | self.pipe.load_lora_weights(
203 | pcm_base,
204 | weight_name=checkpoint,
205 | subfolder="sd15",
206 | )
207 | self.loaded_lora = params.lora_ckpt_id
208 |
209 | prompt_embeds = None
210 | prompt = params.prompt
211 | if hasattr(self, "compel_proc"):
212 | prompt_embeds = self.compel_proc(prompt)
213 | prompt = None
214 | control_image = self.canny_torch(
215 | params.image, params.canny_low_threshold, params.canny_high_threshold
216 | )
217 | strength = params.strength
218 |
219 | results = self.pipe(
220 | image=params.image,
221 | control_image=control_image,
222 | prompt=prompt,
223 | prompt_embeds=prompt_embeds,
224 | generator=generator,
225 | strength=strength,
226 | num_inference_steps=steps,
227 | guidance_scale=guidance_scale,
228 | width=params.width,
229 | height=params.height,
230 | output_type="pil",
231 | controlnet_conditioning_scale=params.controlnet_scale,
232 | control_guidance_start=params.controlnet_start,
233 | control_guidance_end=params.controlnet_end,
234 | )
235 |
236 | result_image = results.images[0]
237 | if params.debug_canny:
238 | # paste control_image on top of result_image
239 | w0, h0 = (200, 200)
240 | control_image = control_image.resize((w0, h0))
241 | w1, h1 = result_image.size
242 | result_image.paste(control_image, (w1 - w0, h1 - h0))
243 |
244 | return result_image
245 |
--------------------------------------------------------------------------------
/server/pipelines/img2img.py:
--------------------------------------------------------------------------------
1 | from diffusers import (
2 | AutoPipelineForImage2Image,
3 | AutoencoderTiny,
4 | )
5 | from compel import Compel
6 | import torch
7 |
8 | try:
9 | import intel_extension_for_pytorch as ipex # type: ignore
10 | except:
11 | pass
12 |
13 | import psutil
14 | from config import Args
15 | from pydantic import BaseModel, Field
16 | from PIL import Image
17 | from util import ParamsModel
18 | import math
19 |
20 | base_model = "SimianLuo/LCM_Dreamshaper_v7"
21 | taesd_model = "madebyollin/taesd"
22 |
23 | default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
24 | page_content = """
25 | Real-Time Latent Consistency Model
26 | Image-to-Image LCM
27 |
28 | This demo showcases
29 | LCM
33 | Image to Image pipeline using
34 | Diffusers with a MJPEG stream server.
39 |
40 |
41 | Change the prompt to generate different images, accepts Compel syntax.
46 |
47 | """
48 |
49 |
50 | class Pipeline:
51 | class Info(BaseModel):
52 | name: str = "img2img"
53 | title: str = "Image-to-Image LCM"
54 | description: str = "Generates an image from a text prompt"
55 | input_mode: str = "image"
56 | page_content: str = page_content
57 |
58 | class InputParams(ParamsModel):
59 | prompt: str = Field(
60 | default_prompt,
61 | title="Prompt",
62 | field="textarea",
63 | id="prompt",
64 | )
65 | seed: int = Field(
66 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
67 | )
68 | steps: int = Field(
69 | 4, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
70 | )
71 | width: int = Field(
72 | 768, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
73 | )
74 | height: int = Field(
75 | 768, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
76 | )
77 | guidance_scale: float = Field(
78 | 0.2,
79 | min=0,
80 | max=20,
81 | step=0.001,
82 | title="Guidance Scale",
83 | field="range",
84 | hide=True,
85 | id="guidance_scale",
86 | )
87 | strength: float = Field(
88 | 0.5,
89 | min=0.25,
90 | max=1.0,
91 | step=0.001,
92 | title="Strength",
93 | field="range",
94 | hide=True,
95 | id="strength",
96 | )
97 |
98 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
99 | self.pipe = AutoPipelineForImage2Image.from_pretrained(
100 | base_model,
101 | safety_checker=None,
102 | )
103 | if args.taesd:
104 | self.pipe.vae = AutoencoderTiny.from_pretrained(
105 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True
106 | ).to(device)
107 |
108 | if args.sfast:
109 | from sfast.compilers.stable_diffusion_pipeline_compiler import (
110 | compile,
111 | CompilationConfig,
112 | )
113 |
114 | config = CompilationConfig.Default()
115 | config.enable_xformers = True
116 | config.enable_triton = True
117 | config.enable_cuda_graph = True
118 | self.pipe = compile(self.pipe, config=config)
119 |
120 | self.pipe.set_progress_bar_config(disable=True)
121 | self.pipe.to(device=device, dtype=torch_dtype)
122 | if device.type != "mps":
123 | self.pipe.unet.to(memory_format=torch.channels_last)
124 |
125 | if args.torch_compile:
126 | print("Running torch compile")
127 | self.pipe.unet = torch.compile(
128 | self.pipe.unet, mode="reduce-overhead", fullgraph=True
129 | )
130 | self.pipe.vae = torch.compile(
131 | self.pipe.vae, mode="reduce-overhead", fullgraph=True
132 | )
133 |
134 | self.pipe(
135 | prompt="warmup",
136 | image=[Image.new("RGB", (768, 768))],
137 | )
138 |
139 | if args.compel:
140 | self.compel_proc = Compel(
141 | tokenizer=self.pipe.tokenizer,
142 | text_encoder=self.pipe.text_encoder,
143 | truncate_long_prompts=False,
144 | )
145 |
146 | def predict(self, params: "Pipeline.InputParams") -> Image.Image:
147 | generator = torch.manual_seed(params.seed)
148 | prompt_embeds = None
149 | prompt = params.prompt
150 | if hasattr(self, "compel_proc"):
151 | prompt_embeds = self.compel_proc(params.prompt)
152 | prompt = None
153 |
154 | steps = params.steps
155 | strength = params.strength
156 | if int(steps * strength) < 1:
157 | steps = math.ceil(1 / max(0.10, strength))
158 |
159 | results = self.pipe(
160 | image=params.image,
161 | prompt=prompt,
162 | prompt_embeds=prompt_embeds,
163 | generator=generator,
164 | strength=strength,
165 | num_inference_steps=steps,
166 | guidance_scale=params.guidance_scale,
167 | width=params.width,
168 | height=params.height,
169 | output_type="pil",
170 | )
171 |
172 | return results.images[0]
173 |
--------------------------------------------------------------------------------
/server/pipelines/img2imgFlux.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from optimum.quanto import freeze, qfloat8, quantize
4 | from transformers.modeling_utils import PreTrainedModel
5 | from diffusers import AutoencoderTiny
6 | from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel
7 | from diffusers.pipelines.flux.pipeline_flux_img2img import FluxImg2ImgPipeline
8 | from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
9 | from diffusers import FlowMatchEulerDiscreteScheduler, AutoencoderKL
10 |
11 |
12 | from pruna import smash, SmashConfig
13 | from pruna.telemetry import set_telemetry_metrics
14 |
15 | set_telemetry_metrics(False) # disable telemetry for current session
16 | set_telemetry_metrics(False, set_as_default=True) # disable telemetry globally
17 |
18 |
19 | try:
20 | import intel_extension_for_pytorch as ipex # type: ignore
21 | except:
22 | pass
23 |
24 | import psutil
25 | from config import Args
26 | from pydantic import BaseModel, Field
27 | from PIL import Image
28 | from pathlib import Path
29 | from util import ParamsModel
30 | import math
31 | import gc
32 |
33 |
34 | # model_path = "black-forest-labs/FLUX.1-dev"
35 | model_path = "black-forest-labs/FLUX.1-schnell"
36 | base_model_path = "black-forest-labs/FLUX.1-schnell"
37 | taesd_path = "madebyollin/taef1"
38 | subfolder = "transformer"
39 | transformer_path = model_path
40 | models_path = Path("models")
41 |
42 | default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux"
43 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
44 | page_content = """
45 | Real-Time FLUX
46 |
47 | """
48 |
49 |
50 | def flush():
51 | torch.cuda.empty_cache()
52 | gc.collect()
53 |
54 |
55 | class Pipeline:
56 | class Info(BaseModel):
57 | name: str = "img2img"
58 | title: str = "Image-to-Image SDXL"
59 | description: str = "Generates an image from a text prompt"
60 | input_mode: str = "image"
61 | page_content: str = page_content
62 |
63 | class InputParams(ParamsModel):
64 | prompt: str = Field(
65 | default_prompt,
66 | title="Prompt",
67 | field="textarea",
68 | id="prompt",
69 | )
70 | seed: int = Field(
71 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
72 | )
73 | steps: int = Field(
74 | 1, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
75 | )
76 | width: int = Field(
77 | 1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
78 | )
79 | height: int = Field(
80 | 1024, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
81 | )
82 | strength: float = Field(
83 | 0.5,
84 | min=0.25,
85 | max=1.0,
86 | step=0.001,
87 | title="Strength",
88 | field="range",
89 | hide=True,
90 | id="strength",
91 | )
92 | guidance: float = Field(
93 | 3.5,
94 | min=0,
95 | max=20,
96 | step=0.001,
97 | title="Guidance",
98 | hide=True,
99 | field="range",
100 | id="guidance",
101 | )
102 |
103 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
104 | # ckpt_path = (
105 | # "https://huggingface.co/city96/FLUX.1-dev-gguf/blob/main/flux1-dev-Q2_K.gguf"
106 | # )
107 | print("Loading model")
108 |
109 | model_id = "black-forest-labs/FLUX.1-schnell"
110 | model_revision = "refs/pr/1"
111 | text_model_id = "openai/clip-vit-large-patch14"
112 | model_data_type = torch.bfloat16
113 | tokenizer = CLIPTokenizer.from_pretrained(
114 | text_model_id, torch_dtype=model_data_type
115 | )
116 | text_encoder = CLIPTextModel.from_pretrained(
117 | text_model_id, torch_dtype=model_data_type
118 | )
119 |
120 | # 2
121 | tokenizer_2 = T5TokenizerFast.from_pretrained(
122 | model_id,
123 | subfolder="tokenizer_2",
124 | torch_dtype=model_data_type,
125 | revision=model_revision,
126 | )
127 | text_encoder_2 = T5EncoderModel.from_pretrained(
128 | model_id,
129 | subfolder="text_encoder_2",
130 | torch_dtype=model_data_type,
131 | revision=model_revision,
132 | )
133 |
134 | # Transformers
135 | scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
136 | model_id, subfolder="scheduler", revision=model_revision
137 | )
138 | transformer = FluxTransformer2DModel.from_pretrained(
139 | model_id,
140 | subfolder="transformer",
141 | torch_dtype=model_data_type,
142 | revision=model_revision,
143 | )
144 |
145 | # VAE
146 | # vae = AutoencoderKL.from_pretrained(
147 | # model_id,
148 | # subfolder="vae",
149 | # torch_dtype=model_data_type,
150 | # revision=model_revision,
151 | # )
152 |
153 | vae = AutoencoderTiny.from_pretrained(
154 | "madebyollin/taef1", torch_dtype=torch.bfloat16
155 | )
156 |
157 | # Initialize the SmashConfig
158 | smash_config = SmashConfig()
159 | smash_config["quantizer"] = "quanto"
160 | smash_config["quanto_calibrate"] = False
161 | smash_config["quanto_weight_bits"] = "qint4"
162 | # (
163 | # "qint4" # "qfloat8" # or "qint2", "qint4", "qint8"
164 | # )
165 |
166 | transformer = smash(
167 | model=transformer,
168 | smash_config=smash_config,
169 | )
170 | text_encoder_2 = smash(
171 | model=text_encoder_2,
172 | smash_config=smash_config,
173 | )
174 |
175 | pipe = FluxImg2ImgPipeline(
176 | scheduler=scheduler,
177 | text_encoder=text_encoder,
178 | tokenizer=tokenizer,
179 | text_encoder_2=text_encoder_2,
180 | tokenizer_2=tokenizer_2,
181 | vae=vae,
182 | transformer=transformer,
183 | )
184 |
185 | # if args.taesd:
186 | # pipe.vae = AutoencoderTiny.from_pretrained(
187 | # taesd_path, torch_dtype=torch.bfloat16, use_safetensors=True
188 | # )
189 | # pipe.enable_model_cpu_offload()
190 | pipe.text_encoder.to(device)
191 | pipe.vae.to(device)
192 | pipe.transformer.to(device)
193 | pipe.text_encoder_2.to(device)
194 |
195 | # pipe.enable_model_cpu_offload()
196 | # For added memory savings run this block, there is however a trade-off with speed.
197 | # vae.enable_tiling()
198 | # vae.enable_slicing()
199 | # pipe.enable_sequential_cpu_offload()
200 |
201 | self.pipe = pipe
202 | self.pipe.set_progress_bar_config(disable=True)
203 | # vae = AutoencoderKL.from_pretrained(
204 | # base_model_path, subfolder="vae", torch_dtype=torch_dtype
205 | # )
206 |
207 | def predict(self, params: "Pipeline.InputParams") -> Image.Image:
208 | generator = torch.manual_seed(params.seed)
209 | steps = params.steps
210 | strength = params.strength
211 | prompt = params.prompt
212 | guidance = params.guidance
213 |
214 | results = self.pipe(
215 | image=params.image,
216 | prompt=prompt,
217 | generator=generator,
218 | strength=strength,
219 | num_inference_steps=steps,
220 | guidance_scale=guidance,
221 | width=params.width,
222 | height=params.height,
223 | )
224 | return results.images[0]
225 |
--------------------------------------------------------------------------------
/server/pipelines/img2imgSDTurbo.py:
--------------------------------------------------------------------------------
1 | from diffusers import (
2 | AutoPipelineForImage2Image,
3 | AutoencoderTiny,
4 | )
5 | import torch
6 |
7 |
8 | from config import Args
9 | from pydantic import BaseModel, Field
10 | from PIL import Image
11 | from util import ParamsModel
12 | import math
13 |
14 | from pruna import smash, SmashConfig
15 |
16 | base_model = "stabilityai/sd-turbo"
17 | taesd_model = "madebyollin/taesd"
18 |
19 | default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux"
20 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
21 | page_content = """
22 | Real-Time SD-Turbo
23 | Image-to-Image
24 |
25 | This demo showcases
26 | SDXL Turbo
30 | Image to Image pipeline using
31 | Diffusers with a MJPEG stream server.
36 |
37 |
38 | Change the prompt to generate different images, accepts Compel syntax.
43 |
44 | """
45 |
46 |
47 | class Pipeline:
48 | class Info(BaseModel):
49 | name: str = "img2img"
50 | title: str = "Image-to-Image SDXL"
51 | description: str = "Generates an image from a text prompt"
52 | input_mode: str = "image"
53 | page_content: str = page_content
54 |
55 | class InputParams(ParamsModel):
56 | prompt: str = Field(
57 | default_prompt,
58 | title="Prompt",
59 | field="textarea",
60 | id="prompt",
61 | )
62 | negative_prompt: str = Field(
63 | default_negative_prompt,
64 | title="Negative Prompt",
65 | field="textarea",
66 | id="negative_prompt",
67 | hide=True,
68 | )
69 | seed: int = Field(
70 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
71 | )
72 | steps: int = Field(
73 | 1, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
74 | )
75 | width: int = Field(
76 | 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
77 | )
78 | height: int = Field(
79 | 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
80 | )
81 | strength: float = Field(
82 | 0.5,
83 | min=0.25,
84 | max=1.0,
85 | step=0.001,
86 | title="Strength",
87 | field="range",
88 | hide=True,
89 | id="strength",
90 | )
91 |
92 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
93 | self.pipe = AutoPipelineForImage2Image.from_pretrained(
94 | base_model,
95 | safety_checker=None,
96 | )
97 | if args.taesd:
98 | self.pipe.vae = AutoencoderTiny.from_pretrained(
99 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True
100 | ).to(device)
101 |
102 | if args.pruna:
103 | # Create and smash your model
104 | smash_config = SmashConfig()
105 | # smash_config["cacher"] = "deepcache"
106 | smash_config["compiler"] = "stable_fast"
107 | self.pipe = smash(model=self.pipe, smash_config=smash_config)
108 |
109 | self.pipe.set_progress_bar_config(disable=True)
110 | self.pipe.to(device=device, dtype=torch_dtype)
111 | # if device.type != "mps":
112 | # self.pipe.unet.to(memory_format=torch.channels_last)
113 |
114 | if args.torch_compile:
115 | print("Running torch compile")
116 | self.pipe.unet = torch.compile(
117 | self.pipe.unet, mode="reduce-overhead", fullgraph=True
118 | )
119 | self.pipe.vae = torch.compile(
120 | self.pipe.vae, mode="reduce-overhead", fullgraph=True
121 | )
122 |
123 | self.pipe(
124 | prompt="warmup",
125 | image=[Image.new("RGB", (768, 768))],
126 | )
127 | if args.compel:
128 | from compel import Compel
129 |
130 | self.pipe.compel_proc = Compel(
131 | tokenizer=self.pipe.tokenizer,
132 | text_encoder=self.pipe.text_encoder,
133 | truncate_long_prompts=True,
134 | )
135 |
136 | def predict(self, params: "Pipeline.InputParams") -> Image.Image:
137 | generator = torch.manual_seed(params.seed)
138 | steps = params.steps
139 | strength = params.strength
140 | if int(steps * strength) < 1:
141 | steps = math.ceil(1 / max(0.10, strength))
142 |
143 | prompt = params.prompt
144 | prompt_embeds = None
145 | if hasattr(self.pipe, "compel_proc"):
146 | prompt_embeds = self.pipe.compel_proc(
147 | [params.prompt, params.negative_prompt]
148 | )
149 | prompt = None
150 |
151 | results = self.pipe(
152 | image=params.image,
153 | prompt_embeds=prompt_embeds,
154 | prompt=prompt,
155 | negative_prompt=params.negative_prompt,
156 | generator=generator,
157 | strength=strength,
158 | num_inference_steps=steps,
159 | guidance_scale=1.1,
160 | width=params.width,
161 | height=params.height,
162 | output_type="pil",
163 | )
164 |
165 | return results.images[0]
166 |
--------------------------------------------------------------------------------
/server/pipelines/img2imgSDXL-Lightning.py:
--------------------------------------------------------------------------------
1 | from diffusers import (
2 | AutoPipelineForImage2Image,
3 | AutoencoderTiny,
4 | AutoencoderKL,
5 | UNet2DConditionModel,
6 | EulerDiscreteScheduler,
7 | )
8 | from compel import Compel, ReturnedEmbeddingsType
9 | import torch
10 |
11 | try:
12 | import intel_extension_for_pytorch as ipex # type: ignore
13 | except:
14 | pass
15 |
16 | from safetensors.torch import load_file
17 | from huggingface_hub import hf_hub_download
18 | from config import Args
19 | from pydantic import BaseModel, Field
20 | from PIL import Image
21 | from util import ParamsModel
22 | import math
23 | from pruna import SmashConfig, smash
24 |
25 | base = "stabilityai/stable-diffusion-xl-base-1.0"
26 | repo = "ByteDance/SDXL-Lightning"
27 | ckpt = "sdxl_lightning_2step_unet.safetensors"
28 | taesd_model = "madebyollin/taesdxl"
29 | NUM_STEPS = 2
30 |
31 | default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux"
32 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
33 | page_content = """
34 | Real-Time SDXL Lightning
35 | Image-to-Image
36 |
37 | This demo showcases
38 | SDXL Turbo
42 | Image to Image pipeline using
43 | Diffusers with a MJPEG stream server.
48 |
49 |
50 | Change the prompt to generate different images, accepts Compel syntax.
55 |
56 | """
57 |
58 |
59 | class Pipeline:
60 | class Info(BaseModel):
61 | name: str = "img2img"
62 | title: str = "Image-to-Image SDXL-Lightning"
63 | description: str = "Generates an image from a text prompt"
64 | input_mode: str = "image"
65 | page_content: str = page_content
66 |
67 | class InputParams(ParamsModel):
68 | prompt: str = Field(
69 | default_prompt,
70 | title="Prompt",
71 | field="textarea",
72 | id="prompt",
73 | )
74 | negative_prompt: str = Field(
75 | default_negative_prompt,
76 | title="Negative Prompt",
77 | field="textarea",
78 | id="negative_prompt",
79 | hide=True,
80 | )
81 | seed: int = Field(
82 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
83 | )
84 | steps: int = Field(
85 | 1, min=1, max=10, title="Steps", field="range", hide=True, id="steps"
86 | )
87 | width: int = Field(
88 | 1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
89 | )
90 | height: int = Field(
91 | 1024, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
92 | )
93 | guidance_scale: float = Field(
94 | 0.0,
95 | min=0,
96 | max=1,
97 | step=0.001,
98 | title="Guidance Scale",
99 | field="range",
100 | hide=True,
101 | id="guidance_scale",
102 | )
103 | strength: float = Field(
104 | 0.5,
105 | min=0.25,
106 | max=1.0,
107 | step=0.001,
108 | title="Strength",
109 | field="range",
110 | hide=True,
111 | id="strength",
112 | )
113 |
114 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
115 | if args.taesd:
116 | vae = AutoencoderTiny.from_pretrained(
117 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True
118 | )
119 | else:
120 | vae = AutoencoderKL.from_pretrained(
121 | "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype
122 | )
123 |
124 | unet = UNet2DConditionModel.from_config(base, subfolder="unet")
125 | unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device=device.type))
126 | self.pipe = AutoPipelineForImage2Image.from_pretrained(
127 | base,
128 | unet=unet,
129 | torch_dtype=torch_dtype,
130 | variant="fp16",
131 | safety_checker=False,
132 | vae=vae,
133 | )
134 | # Ensure sampler uses "trailing" timesteps.
135 | self.pipe.scheduler = EulerDiscreteScheduler.from_config(
136 | self.pipe.scheduler.config, timestep_spacing="trailing"
137 | )
138 |
139 | if args.pruna:
140 | # Create and smash your model
141 | smash_config = SmashConfig()
142 | smash_config["cacher"] = "deepcache"
143 | smash_config["compiler"] = "stable_fast"
144 | self.pipe = smash(model=self.pipe, smash_config=smash_config)
145 |
146 | if args.sfast:
147 | from sfast.compilers.stable_diffusion_pipeline_compiler import (
148 | compile,
149 | CompilationConfig,
150 | )
151 |
152 | config = CompilationConfig.Default()
153 | config.enable_xformers = True
154 | config.enable_triton = True
155 | config.enable_cuda_graph = True
156 | self.pipe = compile(self.pipe, config=config)
157 |
158 | self.pipe.set_progress_bar_config(disable=True)
159 | self.pipe.to(device=device, dtype=torch_dtype)
160 | if device.type != "mps":
161 | self.pipe.unet.to(memory_format=torch.channels_last)
162 |
163 | if args.torch_compile:
164 | print("Running torch compile")
165 | self.pipe.unet = torch.compile(
166 | self.pipe.unet, mode="reduce-overhead", fullgraph=True
167 | )
168 | self.pipe.vae = torch.compile(
169 | self.pipe.vae, mode="reduce-overhead", fullgraph=True
170 | )
171 | self.pipe(
172 | prompt="warmup",
173 | image=[Image.new("RGB", (768, 768))],
174 | )
175 |
176 | if args.compel:
177 | self.pipe.compel_proc = Compel(
178 | tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
179 | text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
180 | returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
181 | requires_pooled=[False, True],
182 | )
183 |
184 | def predict(self, params: "Pipeline.InputParams") -> Image.Image:
185 | generator = torch.manual_seed(params.seed)
186 | prompt = params.prompt
187 | negative_prompt = params.negative_prompt
188 | prompt_embeds = None
189 | pooled_prompt_embeds = None
190 | negative_prompt_embeds = None
191 | negative_pooled_prompt_embeds = None
192 | if hasattr(self.pipe, "compel_proc"):
193 | _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
194 | [params.prompt, params.negative_prompt]
195 | )
196 | prompt = None
197 | negative_prompt = None
198 | prompt_embeds = _prompt_embeds[0:1]
199 | pooled_prompt_embeds = pooled_prompt_embeds[0:1]
200 | negative_prompt_embeds = _prompt_embeds[1:2]
201 | negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2]
202 |
203 | steps = params.steps
204 | strength = params.strength
205 | if int(steps * strength) < 1:
206 | steps = math.ceil(1 / max(0.10, strength))
207 |
208 | results = self.pipe(
209 | image=params.image,
210 | prompt=prompt,
211 | negative_prompt=negative_prompt,
212 | prompt_embeds=prompt_embeds,
213 | pooled_prompt_embeds=pooled_prompt_embeds,
214 | negative_prompt_embeds=negative_prompt_embeds,
215 | negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
216 | generator=generator,
217 | strength=strength,
218 | num_inference_steps=steps,
219 | guidance_scale=params.guidance_scale,
220 | width=params.width,
221 | height=params.height,
222 | output_type="pil",
223 | )
224 |
225 | return results.images[0]
226 |
--------------------------------------------------------------------------------
/server/pipelines/img2imgSDXLTurbo.py:
--------------------------------------------------------------------------------
1 | from diffusers import (
2 | AutoPipelineForImage2Image,
3 | AutoencoderTiny,
4 | )
5 | from compel import Compel, ReturnedEmbeddingsType
6 | import torch
7 |
8 | try:
9 | import intel_extension_for_pytorch as ipex # type: ignore
10 | except:
11 | pass
12 |
13 | import psutil
14 | from config import Args
15 | from pydantic import BaseModel, Field
16 | from PIL import Image
17 | from util import ParamsModel
18 | import math
19 |
20 | from pruna import smash, SmashConfig
21 | from pruna.telemetry import set_telemetry_metrics
22 |
23 | set_telemetry_metrics(False) # disable telemetry for current session
24 | set_telemetry_metrics(False, set_as_default=True) # disable telemetry globally
25 |
26 |
27 | base_model = "stabilityai/sdxl-turbo"
28 | taesd_model = "madebyollin/taesdxl"
29 |
30 | default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux"
31 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
32 | page_content = """
33 | Real-Time SDXL Turbo
34 | Image-to-Image
35 |
36 | This demo showcases
37 | SDXL Turbo
41 | Image to Image pipeline using
42 | Diffusers with a MJPEG stream server.
47 |
48 |
49 | Change the prompt to generate different images, accepts Compel syntax.
54 |
55 | """
56 |
57 |
58 | class Pipeline:
59 | class Info(BaseModel):
60 | name: str = "img2img"
61 | title: str = "Image-to-Image SDXL"
62 | description: str = "Generates an image from a text prompt"
63 | input_mode: str = "image"
64 | page_content: str = page_content
65 |
66 | class InputParams(ParamsModel):
67 | prompt: str = Field(
68 | default_prompt,
69 | title="Prompt",
70 | field="textarea",
71 | id="prompt",
72 | )
73 | negative_prompt: str = Field(
74 | default_negative_prompt,
75 | title="Negative Prompt",
76 | field="textarea",
77 | id="negative_prompt",
78 | hide=True,
79 | )
80 | seed: int = Field(
81 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
82 | )
83 | steps: int = Field(
84 | 1, min=1, max=10, title="Steps", field="range", hide=True, id="steps"
85 | )
86 | width: int = Field(
87 | 768, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
88 | )
89 | height: int = Field(
90 | 768, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
91 | )
92 | guidance_scale: float = Field(
93 | 1.0,
94 | min=0,
95 | max=1,
96 | step=0.001,
97 | title="Guidance Scale",
98 | field="range",
99 | hide=True,
100 | id="guidance_scale",
101 | )
102 | strength: float = Field(
103 | 0.5,
104 | min=0.25,
105 | max=1.0,
106 | step=0.001,
107 | title="Strength",
108 | field="range",
109 | hide=True,
110 | id="strength",
111 | )
112 |
113 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
114 | base_pipe = AutoPipelineForImage2Image.from_pretrained(
115 | base_model,
116 | safety_checker=None,
117 | )
118 | self.pipe = None
119 | if args.taesd:
120 | self.pipe.vae = AutoencoderTiny.from_pretrained(
121 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True
122 | ).to(device)
123 |
124 | if args.sfast:
125 | from sfast.compilers.stable_diffusion_pipeline_compiler import (
126 | compile,
127 | CompilationConfig,
128 | )
129 |
130 | config = CompilationConfig.Default()
131 | config.enable_xformers = True
132 | config.enable_triton = True
133 | config.enable_cuda_graph = True
134 | self.pipe = compile(self.pipe, config=config)
135 |
136 | if device.type != "mps":
137 | self.pipe.unet.to(memory_format=torch.channels_last)
138 |
139 | if args.pruna:
140 | # Create and smash your model
141 | smash_config = SmashConfig()
142 | smash_config["cacher"] = "deepcache"
143 | smash_config["compiler"] = "stable_fast"
144 | self.pipe = smash(model=base_pipe, smash_config=smash_config)
145 |
146 | if args.torch_compile:
147 | print("Running torch compile")
148 | self.pipe.unet = torch.compile(
149 | self.pipe.unet, mode="reduce-overhead", fullgraph=True
150 | )
151 | self.pipe.vae = torch.compile(
152 | self.pipe.vae, mode="reduce-overhead", fullgraph=True
153 | )
154 | self.pipe(
155 | prompt="warmup",
156 | image=[Image.new("RGB", (768, 768))],
157 | )
158 |
159 | if args.compel:
160 | self.pipe.compel_proc = Compel(
161 | tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
162 | text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
163 | returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
164 | requires_pooled=[False, True],
165 | )
166 |
167 | self.pipe.set_progress_bar_config(disable=True)
168 | self.pipe.to(device=device, dtype=torch_dtype)
169 |
170 | def predict(self, params: "Pipeline.InputParams") -> Image.Image:
171 | generator = torch.manual_seed(params.seed)
172 | prompt = params.prompt
173 | negative_prompt = params.negative_prompt
174 | prompt_embeds = None
175 | pooled_prompt_embeds = None
176 | negative_prompt_embeds = None
177 | negative_pooled_prompt_embeds = None
178 | if hasattr(self.pipe, "compel_proc"):
179 | _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
180 | [params.prompt, params.negative_prompt]
181 | )
182 | prompt = None
183 | negative_prompt = None
184 | prompt_embeds = _prompt_embeds[0:1]
185 | pooled_prompt_embeds = pooled_prompt_embeds[0:1]
186 | negative_prompt_embeds = _prompt_embeds[1:2]
187 | negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2]
188 |
189 | steps = params.steps
190 | strength = params.strength
191 | if int(steps * strength) < 1:
192 | steps = math.ceil(1 / max(0.10, strength))
193 |
194 | results = self.pipe(
195 | image=params.image,
196 | prompt=prompt,
197 | negative_prompt=negative_prompt,
198 | prompt_embeds=prompt_embeds,
199 | pooled_prompt_embeds=pooled_prompt_embeds,
200 | negative_prompt_embeds=negative_prompt_embeds,
201 | negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
202 | generator=generator,
203 | strength=strength,
204 | num_inference_steps=steps,
205 | guidance_scale=params.guidance_scale,
206 | width=params.width,
207 | height=params.height,
208 | output_type="pil",
209 | )
210 |
211 | return results.images[0]
212 |
--------------------------------------------------------------------------------
/server/pipelines/img2imgSDXS512.py:
--------------------------------------------------------------------------------
1 | from diffusers import AutoPipelineForImage2Image, AutoencoderTiny
2 | from compel import Compel
3 | import torch
4 |
5 | try:
6 | import intel_extension_for_pytorch as ipex # type: ignore
7 | except:
8 | pass
9 |
10 | import psutil
11 | from config import Args
12 | from pydantic import BaseModel, Field
13 | from PIL import Image
14 | from util import ParamsModel
15 | import math
16 |
17 | base_model = "IDKiro/sdxs-512-0.9"
18 | taesd_model = "madebyollin/taesd"
19 |
20 | default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
21 | page_content = """
22 | Real-Time Latent SDXS
23 | Image-to-Image SDXS
24 |
25 | This demo showcases
26 | LCM
30 | Image to Image pipeline using
31 | Diffusers with a MJPEG stream server.
36 |
37 |
38 | Change the prompt to generate different images, accepts Compel syntax.
43 |
44 | """
45 |
46 |
47 | class Pipeline:
48 | class Info(BaseModel):
49 | name: str = "img2img"
50 | title: str = "Image-to-Image SDXS"
51 | description: str = "Generates an image from a text prompt"
52 | input_mode: str = "image"
53 | page_content: str = page_content
54 |
55 | class InputParams(ParamsModel):
56 | prompt: str = Field(
57 | default_prompt,
58 | title="Prompt",
59 | field="textarea",
60 | id="prompt",
61 | )
62 | seed: int = Field(
63 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
64 | )
65 | steps: int = Field(
66 | 1, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
67 | )
68 | width: int = Field(
69 | 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
70 | )
71 | height: int = Field(
72 | 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
73 | )
74 | guidance_scale: float = Field(
75 | 0.0,
76 | min=0,
77 | max=20,
78 | step=0.001,
79 | title="Guidance Scale",
80 | field="range",
81 | hide=True,
82 | id="guidance_scale",
83 | )
84 | strength: float = Field(
85 | 0.5,
86 | min=0.25,
87 | max=1.0,
88 | step=0.001,
89 | title="Strength",
90 | field="range",
91 | hide=True,
92 | id="strength",
93 | )
94 |
95 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
96 | self.pipe = AutoPipelineForImage2Image.from_pretrained(
97 | base_model,
98 | safety_checker=None,
99 | )
100 | if args.taesd:
101 | self.pipe.vae = AutoencoderTiny.from_pretrained(
102 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True
103 | ).to(device)
104 |
105 | if args.sfast:
106 | from sfast.compilers.stable_diffusion_pipeline_compiler import (
107 | compile,
108 | CompilationConfig,
109 | )
110 |
111 | config = CompilationConfig.Default()
112 | config.enable_xformers = True
113 | config.enable_triton = True
114 | config.enable_cuda_graph = True
115 | self.pipe = compile(self.pipe, config=config)
116 |
117 | self.pipe.set_progress_bar_config(disable=True)
118 | self.pipe.to(device=device, dtype=torch_dtype)
119 | if device.type != "mps":
120 | self.pipe.unet.to(memory_format=torch.channels_last)
121 |
122 | if args.torch_compile:
123 | print("Running torch compile")
124 | self.pipe.unet = torch.compile(
125 | self.pipe.unet, mode="reduce-overhead", fullgraph=True
126 | )
127 | self.pipe.vae = torch.compile(
128 | self.pipe.vae, mode="reduce-overhead", fullgraph=True
129 | )
130 |
131 | self.pipe(
132 | prompt="warmup",
133 | image=[Image.new("RGB", (768, 768))],
134 | )
135 |
136 | if args.compel:
137 | self.compel_proc = Compel(
138 | tokenizer=self.pipe.tokenizer,
139 | text_encoder=self.pipe.text_encoder,
140 | truncate_long_prompts=False,
141 | )
142 |
143 | def predict(self, params: "Pipeline.InputParams") -> Image.Image:
144 | generator = torch.manual_seed(params.seed)
145 | prompt_embeds = None
146 | prompt = params.prompt
147 | if hasattr(self, "compel_proc"):
148 | prompt_embeds = self.compel_proc(params.prompt)
149 | prompt = None
150 |
151 | results = self.pipe(
152 | image=params.image,
153 | prompt=prompt,
154 | prompt_embeds=prompt_embeds,
155 | generator=generator,
156 | strength=params.strength,
157 | num_inference_steps=params.steps,
158 | guidance_scale=params.guidance_scale,
159 | width=params.width,
160 | height=params.height,
161 | output_type="pil",
162 | )
163 | return results.images[0]
164 |
--------------------------------------------------------------------------------
/server/pipelines/img2imgSegmindVegaRT.py:
--------------------------------------------------------------------------------
1 | from diffusers import (
2 | AutoPipelineForImage2Image,
3 | LCMScheduler,
4 | AutoencoderTiny,
5 | )
6 | from compel import Compel, ReturnedEmbeddingsType
7 | import torch
8 |
9 | try:
10 | import intel_extension_for_pytorch as ipex # type: ignore
11 | except:
12 | pass
13 |
14 | import psutil
15 | from config import Args
16 | from pydantic import BaseModel, Field
17 | from util import ParamsModel
18 | from PIL import Image
19 | import math
20 |
21 | base_model = "segmind/Segmind-Vega"
22 | lora_model = "segmind/Segmind-VegaRT"
23 | taesd_model = "madebyollin/taesdxl"
24 |
25 | default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux"
26 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
27 | page_content = """
28 | Real-Time SegmindVegaRT
29 | Image-to-Image
30 |
31 | This demo showcases
32 | SegmindVegaRT
36 | Image to Image pipeline using
37 | Diffusers with a MJPEG stream server.
42 |
43 |
44 | Change the prompt to generate different images, accepts Compel syntax.
49 |
50 | """
51 |
52 |
53 | class Pipeline:
54 | class Info(BaseModel):
55 | name: str = "img2img"
56 | title: str = "Image-to-Image Playground 256"
57 | description: str = "Generates an image from a text prompt"
58 | input_mode: str = "image"
59 | page_content: str = page_content
60 |
61 | class InputParams(ParamsModel):
62 | prompt: str = Field(
63 | default_prompt,
64 | title="Prompt",
65 | field="textarea",
66 | id="prompt",
67 | )
68 | negative_prompt: str = Field(
69 | default_negative_prompt,
70 | title="Negative Prompt",
71 | field="textarea",
72 | id="negative_prompt",
73 | hide=True,
74 | )
75 | seed: int = Field(
76 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
77 | )
78 | steps: int = Field(
79 | 1, min=1, max=10, title="Steps", field="range", hide=True, id="steps"
80 | )
81 | width: int = Field(
82 | 1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
83 | )
84 | height: int = Field(
85 | 1024, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
86 | )
87 | guidance_scale: float = Field(
88 | 0.0,
89 | min=0,
90 | max=1,
91 | step=0.001,
92 | title="Guidance Scale",
93 | field="range",
94 | hide=True,
95 | id="guidance_scale",
96 | )
97 | strength: float = Field(
98 | 0.5,
99 | min=0.25,
100 | max=1.0,
101 | step=0.001,
102 | title="Strength",
103 | field="range",
104 | hide=True,
105 | id="strength",
106 | )
107 |
108 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
109 | self.pipe = AutoPipelineForImage2Image.from_pretrained(
110 | base_model,
111 | safety_checker=None,
112 | variant="fp16",
113 | )
114 | if args.taesd:
115 | self.pipe.vae = AutoencoderTiny.from_pretrained(
116 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True
117 | ).to(device)
118 |
119 | self.pipe.load_lora_weights(lora_model)
120 | self.pipe.fuse_lora()
121 | self.pipe.scheduler = LCMScheduler.from_pretrained(
122 | base_model, subfolder="scheduler"
123 | )
124 | if args.sfast:
125 | from sfast.compilers.stable_diffusion_pipeline_compiler import (
126 | compile,
127 | CompilationConfig,
128 | )
129 |
130 | config = CompilationConfig.Default()
131 | config.enable_xformers = True
132 | config.enable_triton = True
133 | config.enable_cuda_graph = True
134 | self.pipe = compile(self.pipe, config=config)
135 |
136 | self.pipe.set_progress_bar_config(disable=True)
137 | self.pipe.to(device=device, dtype=torch_dtype)
138 | if device.type != "mps":
139 | self.pipe.unet.to(memory_format=torch.channels_last)
140 |
141 | if args.torch_compile:
142 | print("Running torch compile")
143 | self.pipe.unet = torch.compile(
144 | self.pipe.unet, mode="reduce-overhead", fullgraph=False
145 | )
146 | self.pipe.vae = torch.compile(
147 | self.pipe.vae, mode="reduce-overhead", fullgraph=False
148 | )
149 |
150 | self.pipe(
151 | prompt="warmup",
152 | image=[Image.new("RGB", (768, 768))],
153 | )
154 | if args.compel:
155 | self.pipe.compel_proc = Compel(
156 | tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
157 | text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
158 | returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
159 | requires_pooled=[False, True],
160 | )
161 |
162 | def predict(self, params: "Pipeline.InputParams") -> Image.Image:
163 | generator = torch.manual_seed(params.seed)
164 | prompt = params.prompt
165 | negative_prompt = params.negative_prompt
166 | prompt_embeds = None
167 | pooled_prompt_embeds = None
168 | negative_prompt_embeds = None
169 | negative_pooled_prompt_embeds = None
170 | if hasattr(self.pipe, "compel_proc"):
171 | _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
172 | [params.prompt, params.negative_prompt]
173 | )
174 | prompt = None
175 | negative_prompt = None
176 | prompt_embeds = _prompt_embeds[0:1]
177 | pooled_prompt_embeds = pooled_prompt_embeds[0:1]
178 | negative_prompt_embeds = _prompt_embeds[1:2]
179 | negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2]
180 |
181 | steps = params.steps
182 | strength = params.strength
183 | if int(steps * strength) < 1:
184 | steps = math.ceil(1 / max(0.10, strength))
185 |
186 | results = self.pipe(
187 | image=params.image,
188 | prompt=prompt,
189 | negative_prompt=negative_prompt,
190 | prompt_embeds=prompt_embeds,
191 | pooled_prompt_embeds=pooled_prompt_embeds,
192 | negative_prompt_embeds=negative_prompt_embeds,
193 | negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
194 | generator=generator,
195 | strength=strength,
196 | num_inference_steps=steps,
197 | guidance_scale=params.guidance_scale,
198 | width=params.width,
199 | height=params.height,
200 | output_type="pil",
201 | )
202 |
203 | return results.images[0]
204 |
--------------------------------------------------------------------------------
/server/pipelines/pix2pix/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radames/Real-Time-Latent-Consistency-Model/269f4347d93eb8e366e12b0f8f216c8b11262e76/server/pipelines/pix2pix/__init__.py
--------------------------------------------------------------------------------
/server/pipelines/pix2pix/model.py:
--------------------------------------------------------------------------------
1 | # https://github.com/GaParmar/img2img-turbo/blob/main/src/model.py
2 | from diffusers import DDPMScheduler
3 |
4 |
5 | def make_1step_sched():
6 | noise_scheduler_1step = DDPMScheduler.from_pretrained(
7 | "stabilityai/sd-turbo", subfolder="scheduler"
8 | )
9 | noise_scheduler_1step.set_timesteps(1, device="cuda")
10 | noise_scheduler_1step.alphas_cumprod = noise_scheduler_1step.alphas_cumprod.cuda()
11 | return noise_scheduler_1step
12 |
13 |
14 | def my_vae_encoder_fwd(self, sample):
15 | sample = self.conv_in(sample)
16 | l_blocks = []
17 | # down
18 | for down_block in self.down_blocks:
19 | l_blocks.append(sample)
20 | sample = down_block(sample)
21 | # middle
22 | sample = self.mid_block(sample)
23 | sample = self.conv_norm_out(sample)
24 | sample = self.conv_act(sample)
25 | sample = self.conv_out(sample)
26 | self.current_down_blocks = l_blocks
27 | return sample
28 |
29 |
30 | def my_vae_decoder_fwd(self, sample, latent_embeds=None):
31 | sample = self.conv_in(sample)
32 | upscale_dtype = next(iter(self.up_blocks.parameters())).dtype
33 | # middle
34 | sample = self.mid_block(sample, latent_embeds)
35 | sample = sample.to(upscale_dtype)
36 | if not self.ignore_skip:
37 | skip_convs = [
38 | self.skip_conv_1,
39 | self.skip_conv_2,
40 | self.skip_conv_3,
41 | self.skip_conv_4,
42 | ]
43 | # up
44 | for idx, up_block in enumerate(self.up_blocks):
45 | skip_in = skip_convs[idx](self.incoming_skip_acts[::-1][idx] * self.gamma)
46 | # add skip
47 | sample = sample + skip_in
48 | sample = up_block(sample, latent_embeds)
49 | else:
50 | for idx, up_block in enumerate(self.up_blocks):
51 | sample = up_block(sample, latent_embeds)
52 | # post-process
53 | if latent_embeds is None:
54 | sample = self.conv_norm_out(sample)
55 | else:
56 | sample = self.conv_norm_out(sample, latent_embeds)
57 | sample = self.conv_act(sample)
58 | sample = self.conv_out(sample)
59 | return sample
60 |
--------------------------------------------------------------------------------
/server/pipelines/pix2pix/pix2pix_turbo.py:
--------------------------------------------------------------------------------
1 | # https://github.com/GaParmar/img2img-turbo/blob/main/src/pix2pix_turbo.py
2 | import os
3 | import requests
4 | import sys
5 | import pdb
6 | import copy
7 | from tqdm import tqdm
8 | import torch
9 | from transformers import AutoTokenizer, PretrainedConfig, CLIPTextModel
10 | from diffusers import AutoencoderKL, UNet2DConditionModel, DDPMScheduler
11 | from diffusers.utils.peft_utils import set_weights_and_activate_adapters
12 | from peft import LoraConfig
13 |
14 | from pipelines.pix2pix.model import (
15 | make_1step_sched,
16 | my_vae_encoder_fwd,
17 | my_vae_decoder_fwd,
18 | )
19 |
20 |
21 | class TwinConv(torch.nn.Module):
22 | def __init__(self, convin_pretrained, convin_curr):
23 | super(TwinConv, self).__init__()
24 | self.conv_in_pretrained = copy.deepcopy(convin_pretrained)
25 | self.conv_in_curr = copy.deepcopy(convin_curr)
26 | self.r = None
27 |
28 | def forward(self, x):
29 | x1 = self.conv_in_pretrained(x).detach()
30 | x2 = self.conv_in_curr(x)
31 | return x1 * (1 - self.r) + x2 * (self.r)
32 |
33 |
34 | class Pix2Pix_Turbo(torch.nn.Module):
35 | def __init__(self, name, ckpt_folder="checkpoints"):
36 | super().__init__()
37 | self.tokenizer = AutoTokenizer.from_pretrained(
38 | "stabilityai/sd-turbo", subfolder="tokenizer"
39 | )
40 | self.text_encoder = CLIPTextModel.from_pretrained(
41 | "stabilityai/sd-turbo", subfolder="text_encoder"
42 | ).cuda()
43 | self.sched = make_1step_sched()
44 |
45 | vae = AutoencoderKL.from_pretrained("stabilityai/sd-turbo", subfolder="vae")
46 | unet = UNet2DConditionModel.from_pretrained(
47 | "stabilityai/sd-turbo", subfolder="unet"
48 | )
49 |
50 | if name == "edge_to_image":
51 | url = "https://www.cs.cmu.edu/~img2img-turbo/models/edge_to_image_loras.pkl"
52 | os.makedirs(ckpt_folder, exist_ok=True)
53 | outf = os.path.join(ckpt_folder, "edge_to_image_loras.pkl")
54 | if not os.path.exists(outf):
55 | print(f"Downloading checkpoint to {outf}")
56 | response = requests.get(url, stream=True)
57 | total_size_in_bytes = int(response.headers.get("content-length", 0))
58 | block_size = 1024 # 1 Kibibyte
59 | progress_bar = tqdm(
60 | total=total_size_in_bytes, unit="iB", unit_scale=True
61 | )
62 | with open(outf, "wb") as file:
63 | for data in response.iter_content(block_size):
64 | progress_bar.update(len(data))
65 | file.write(data)
66 | progress_bar.close()
67 | if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
68 | print("ERROR, something went wrong")
69 | print(f"Downloaded successfully to {outf}")
70 | p_ckpt = outf
71 | sd = torch.load(p_ckpt, map_location="cpu")
72 | unet_lora_config = LoraConfig(
73 | r=sd["rank_unet"],
74 | init_lora_weights="gaussian",
75 | target_modules=sd["unet_lora_target_modules"],
76 | )
77 |
78 | if name == "sketch_to_image_stochastic":
79 | # download from url
80 | url = "https://www.cs.cmu.edu/~img2img-turbo/models/sketch_to_image_stochastic_lora.pkl"
81 | os.makedirs(ckpt_folder, exist_ok=True)
82 | outf = os.path.join(ckpt_folder, "sketch_to_image_stochastic_lora.pkl")
83 | if not os.path.exists(outf):
84 | print(f"Downloading checkpoint to {outf}")
85 | response = requests.get(url, stream=True)
86 | total_size_in_bytes = int(response.headers.get("content-length", 0))
87 | block_size = 1024 # 1 Kibibyte
88 | progress_bar = tqdm(
89 | total=total_size_in_bytes, unit="iB", unit_scale=True
90 | )
91 | with open(outf, "wb") as file:
92 | for data in response.iter_content(block_size):
93 | progress_bar.update(len(data))
94 | file.write(data)
95 | progress_bar.close()
96 | if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
97 | print("ERROR, something went wrong")
98 | print(f"Downloaded successfully to {outf}")
99 | p_ckpt = outf
100 | sd = torch.load(p_ckpt, map_location="cpu")
101 | unet_lora_config = LoraConfig(
102 | r=sd["rank_unet"],
103 | init_lora_weights="gaussian",
104 | target_modules=sd["unet_lora_target_modules"],
105 | )
106 | convin_pretrained = copy.deepcopy(unet.conv_in)
107 | unet.conv_in = TwinConv(convin_pretrained, unet.conv_in)
108 |
109 | vae.encoder.forward = my_vae_encoder_fwd.__get__(
110 | vae.encoder, vae.encoder.__class__
111 | )
112 | vae.decoder.forward = my_vae_decoder_fwd.__get__(
113 | vae.decoder, vae.decoder.__class__
114 | )
115 | # add the skip connection convs
116 | vae.decoder.skip_conv_1 = torch.nn.Conv2d(
117 | 512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
118 | ).cuda()
119 | vae.decoder.skip_conv_2 = torch.nn.Conv2d(
120 | 256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
121 | ).cuda()
122 | vae.decoder.skip_conv_3 = torch.nn.Conv2d(
123 | 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
124 | ).cuda()
125 | vae.decoder.skip_conv_4 = torch.nn.Conv2d(
126 | 128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
127 | ).cuda()
128 | vae_lora_config = LoraConfig(
129 | r=sd["rank_vae"],
130 | init_lora_weights="gaussian",
131 | target_modules=sd["vae_lora_target_modules"],
132 | )
133 | vae.decoder.ignore_skip = False
134 | vae.add_adapter(vae_lora_config, adapter_name="vae_skip")
135 | unet.add_adapter(unet_lora_config)
136 | _sd_unet = unet.state_dict()
137 | for k in sd["state_dict_unet"]:
138 | _sd_unet[k] = sd["state_dict_unet"][k]
139 | unet.load_state_dict(_sd_unet)
140 | unet.enable_xformers_memory_efficient_attention()
141 | _sd_vae = vae.state_dict()
142 | for k in sd["state_dict_vae"]:
143 | _sd_vae[k] = sd["state_dict_vae"][k]
144 | vae.load_state_dict(_sd_vae)
145 | unet.to("cuda")
146 | vae.to("cuda")
147 | unet.eval()
148 | vae.eval()
149 | self.unet, self.vae = unet, vae
150 | self.vae.decoder.gamma = 1
151 | self.timesteps = torch.tensor([999], device="cuda").long()
152 | self.last_prompt = ""
153 | self.caption_enc = None
154 | self.device = "cuda"
155 |
156 | @torch.no_grad()
157 | def forward(self, c_t, prompt, deterministic=True, r=1.0, noise_map=1.0):
158 | # encode the text prompt
159 | if prompt != self.last_prompt:
160 | caption_tokens = self.tokenizer(
161 | prompt,
162 | max_length=self.tokenizer.model_max_length,
163 | padding="max_length",
164 | truncation=True,
165 | return_tensors="pt",
166 | ).input_ids.cuda()
167 | caption_enc = self.text_encoder(caption_tokens)[0]
168 | self.caption_enc = caption_enc
169 | self.last_prompt = prompt
170 |
171 | if deterministic:
172 | encoded_control = (
173 | self.vae.encode(c_t).latent_dist.sample()
174 | * self.vae.config.scaling_factor
175 | )
176 | model_pred = self.unet(
177 | encoded_control,
178 | self.timesteps,
179 | encoder_hidden_states=self.caption_enc,
180 | ).sample
181 | x_denoised = self.sched.step(
182 | model_pred, self.timesteps, encoded_control, return_dict=True
183 | ).prev_sample
184 | self.vae.decoder.incoming_skip_acts = self.vae.encoder.current_down_blocks
185 | output_image = (
186 | self.vae.decode(x_denoised / self.vae.config.scaling_factor).sample
187 | ).clamp(-1, 1)
188 | else:
189 | # scale the lora weights based on the r value
190 | self.unet.set_adapters(["default"], weights=[r])
191 | set_weights_and_activate_adapters(self.vae, ["vae_skip"], [r])
192 | encoded_control = (
193 | self.vae.encode(c_t).latent_dist.sample()
194 | * self.vae.config.scaling_factor
195 | )
196 | # combine the input and noise
197 | unet_input = encoded_control * r + noise_map * (1 - r)
198 | self.unet.conv_in.r = r
199 | unet_output = self.unet(
200 | unet_input,
201 | self.timesteps,
202 | encoder_hidden_states=self.caption_enc,
203 | ).sample
204 | self.unet.conv_in.r = None
205 | x_denoised = self.sched.step(
206 | unet_output, self.timesteps, unet_input, return_dict=True
207 | ).prev_sample
208 | self.vae.decoder.incoming_skip_acts = self.vae.encoder.current_down_blocks
209 | self.vae.decoder.gamma = r
210 | output_image = (
211 | self.vae.decode(x_denoised / self.vae.config.scaling_factor).sample
212 | ).clamp(-1, 1)
213 | return output_image
214 |
--------------------------------------------------------------------------------
/server/pipelines/pix2pixTurbo.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torchvision import transforms
3 |
4 | from config import Args
5 | from pydantic import BaseModel, Field
6 | from util import ParamsModel
7 | from PIL import Image
8 | from pipelines.pix2pix.pix2pix_turbo import Pix2Pix_Turbo
9 | from pipelines.utils.canny_gpu import ScharrOperator
10 |
11 | default_prompt = "close-up photo of the joker"
12 | page_content = """
13 | Real-Time pix2pix_turbo
14 | pix2pix turbo
15 |
16 | This demo showcases
17 | One-Step Image Translation with Text-to-Image Models
21 |
22 |
23 |
24 | Web app
25 | Real-Time Latent Consistency Models
26 |
27 |
28 | """
29 |
30 |
31 | class Pipeline:
32 | class Info(BaseModel):
33 | name: str = "img2img"
34 | title: str = "Image-to-Image SDXL"
35 | description: str = "Generates an image from a text prompt"
36 | input_mode: str = "image"
37 | page_content: str = page_content
38 |
39 | class InputParams(ParamsModel):
40 | prompt: str = Field(
41 | default_prompt,
42 | title="Prompt",
43 | field="textarea",
44 | id="prompt",
45 | )
46 |
47 | width: int = Field(
48 | 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
49 | )
50 | height: int = Field(
51 | 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
52 | )
53 | seed: int = Field(
54 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
55 | )
56 | noise_r: float = Field(
57 | 1.0,
58 | min=0.01,
59 | max=3.0,
60 | step=0.001,
61 | title="Noise R",
62 | field="range",
63 | hide=True,
64 | id="noise_r",
65 | )
66 |
67 | deterministic: bool = Field(
68 | True,
69 | hide=True,
70 | title="Deterministic",
71 | field="checkbox",
72 | id="deterministic",
73 | )
74 | canny_low_threshold: float = Field(
75 | 0.0,
76 | min=0,
77 | max=1.0,
78 | step=0.001,
79 | title="Canny Low Threshold",
80 | field="range",
81 | hide=True,
82 | id="canny_low_threshold",
83 | )
84 | canny_high_threshold: float = Field(
85 | 1.0,
86 | min=0,
87 | max=1.0,
88 | step=0.001,
89 | title="Canny High Threshold",
90 | field="range",
91 | hide=True,
92 | id="canny_high_threshold",
93 | )
94 | debug_canny: bool = Field(
95 | False,
96 | title="Debug Canny",
97 | field="checkbox",
98 | hide=True,
99 | id="debug_canny",
100 | )
101 |
102 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
103 | self.model = Pix2Pix_Turbo("edge_to_image")
104 | self.canny_torch = ScharrOperator(device=device)
105 | self.device = device
106 | self.last_time = 0.0
107 |
108 | def predict(self, params: "Pipeline.InputParams") -> Image.Image:
109 | canny_pil, canny_tensor = self.canny_torch(
110 | params.image,
111 | params.canny_low_threshold,
112 | params.canny_high_threshold,
113 | output_type="pil,tensor",
114 | )
115 | torch.manual_seed(params.seed)
116 | noise = torch.randn(
117 | (1, 4, params.width // 8, params.height // 8), device=self.device
118 | )
119 | canny_tensor = torch.cat((canny_tensor, canny_tensor, canny_tensor), dim=1)
120 | output_image = self.model(
121 | canny_tensor,
122 | params.prompt,
123 | params.deterministic,
124 | params.noise_r,
125 | noise,
126 | )
127 | output_pil = transforms.ToPILImage()(output_image[0].cpu() * 0.5 + 0.5)
128 |
129 | result_image = output_pil
130 | if params.debug_canny:
131 | # paste control_image on top of result_image
132 | w0, h0 = (200, 200)
133 | control_image = canny_pil.resize((w0, h0))
134 | w1, h1 = result_image.size
135 | result_image.paste(control_image, (w1 - w0, h1 - h0))
136 | return result_image
137 |
--------------------------------------------------------------------------------
/server/pipelines/txt2img.py:
--------------------------------------------------------------------------------
1 | from diffusers import DiffusionPipeline, AutoencoderTiny
2 | from compel import Compel
3 | import torch
4 |
5 | try:
6 | import intel_extension_for_pytorch as ipex # type: ignore
7 | except:
8 | pass
9 |
10 | from config import Args
11 | from pydantic import BaseModel, Field
12 | from util import ParamsModel
13 | from PIL import Image
14 | from typing import List
15 | from pruna import SmashConfig, smash
16 |
17 | base_model = "SimianLuo/LCM_Dreamshaper_v7"
18 | taesd_model = "madebyollin/taesd"
19 |
20 | default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
21 |
22 | page_content = """Real-Time Latent Consistency Model
23 | Text-to-Image
24 |
25 | This demo showcases
26 | LCM
30 | Image to Image pipeline using
31 | Diffusers with a MJPEG stream server
35 |
36 |
37 | Change the prompt to generate different images, accepts Compel syntax.
42 |
"""
43 |
44 |
45 | class Pipeline:
46 | class Info(BaseModel):
47 | name: str = "txt2img"
48 | title: str = "Text-to-Image LCM"
49 | description: str = "Generates an image from a text prompt"
50 | input_mode: str = "text"
51 | page_content: str = page_content
52 |
53 | class InputParams(ParamsModel):
54 | prompt: str = Field(
55 | default_prompt,
56 | title="Prompt",
57 | field="textarea",
58 | id="prompt",
59 | )
60 | seed: int = Field(
61 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
62 | )
63 | steps: int = Field(
64 | 4, min=2, max=15, title="Steps", field="range", hide=True, id="steps"
65 | )
66 | width: int = Field(
67 | 768, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
68 | )
69 | height: int = Field(
70 | 768, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
71 | )
72 | guidance_scale: float = Field(
73 | 8.0,
74 | min=1,
75 | max=30,
76 | step=0.001,
77 | title="Guidance Scale",
78 | field="range",
79 | hide=True,
80 | id="guidance_scale",
81 | )
82 |
83 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
84 | self.pipe = DiffusionPipeline.from_pretrained(base_model, safety_checker=None)
85 | if args.taesd:
86 | self.pipe.vae = AutoencoderTiny.from_pretrained(
87 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True
88 | ).to(device)
89 |
90 | if args.pruna:
91 | # Create and smash your model
92 | smash_config = SmashConfig()
93 | # smash_config["cacher"] = "deepcache"
94 | smash_config["compiler"] = "stable_fast"
95 | self.pipe = smash(model=self.pipe, smash_config=smash_config)
96 |
97 | if args.sfast:
98 | from sfast.compilers.stable_diffusion_pipeline_compiler import (
99 | compile,
100 | CompilationConfig,
101 | )
102 |
103 | config = CompilationConfig.Default()
104 | config.enable_xformers = True
105 | config.enable_triton = True
106 | config.enable_cuda_graph = True
107 | self.pipe = compile(self.pipe, config=config)
108 |
109 | self.pipe.set_progress_bar_config(disable=True)
110 | self.pipe.to(device=device, dtype=torch_dtype)
111 | if device.type != "mps":
112 | self.pipe.unet.to(memory_format=torch.channels_last)
113 |
114 | if args.torch_compile:
115 | self.pipe.unet = torch.compile(
116 | self.pipe.unet, mode="reduce-overhead", fullgraph=True
117 | )
118 | self.pipe.vae = torch.compile(
119 | self.pipe.vae, mode="reduce-overhead", fullgraph=True
120 | )
121 |
122 | self.pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
123 |
124 | if args.compel:
125 | self.compel_proc = Compel(
126 | tokenizer=self.pipe.tokenizer,
127 | text_encoder=self.pipe.text_encoder,
128 | truncate_long_prompts=False,
129 | )
130 |
131 | def predict(self, params: "Pipeline.InputParams") -> Image.Image:
132 | generator = torch.manual_seed(params.seed)
133 | prompt_embeds = None
134 | prompt = params.prompt
135 | if hasattr(self, "compel_proc"):
136 | prompt_embeds = self.compel_proc(params.prompt)
137 | prompt = None
138 |
139 | results = self.pipe(
140 | prompt_embeds=prompt_embeds,
141 | prompt=prompt,
142 | generator=generator,
143 | num_inference_steps=params.steps,
144 | guidance_scale=params.guidance_scale,
145 | width=params.width,
146 | height=params.height,
147 | output_type="pil",
148 | )
149 |
150 | return results.images[0]
151 |
--------------------------------------------------------------------------------
/server/pipelines/txt2imgLora.py:
--------------------------------------------------------------------------------
1 | from diffusers import DiffusionPipeline, AutoencoderTiny, LCMScheduler
2 | from compel import Compel
3 | import torch
4 |
5 | try:
6 | import intel_extension_for_pytorch as ipex # type: ignore
7 | except:
8 | pass
9 |
10 | import psutil
11 | from config import Args
12 | from pydantic import BaseModel, Field
13 | from util import ParamsModel
14 | from PIL import Image
15 | from pruna import SmashConfig, smash
16 |
17 | base_model = "wavymulder/Analog-Diffusion"
18 | lcm_lora_id = "latent-consistency/lcm-lora-sdv1-5"
19 | taesd_model = "madebyollin/taesd"
20 |
21 | default_prompt = "Analog style photograph of young Harrison Ford as Han Solo, star wars behind the scenes"
22 |
23 | page_content = """
24 | Real-Time Latent Consistency Model SDv1.5
25 | Text-to-Image LCM + LoRa
26 |
27 | This demo showcases
28 | LCM
32 | Image to Image pipeline using
33 | Diffusers with a MJPEG stream server. Featuring Analog-Diffusion
41 |
42 |
43 | Change the prompt to generate different images, accepts Compel syntax.
48 |
49 | """
50 |
51 |
52 | class Pipeline:
53 | class Info(BaseModel):
54 | name: str = "controlnet"
55 | title: str = "Text-to-Image LCM + LoRa"
56 | description: str = "Generates an image from a text prompt"
57 | input_mode: str = "text"
58 | page_content: str = page_content
59 |
60 | class InputParams(ParamsModel):
61 | prompt: str = Field(
62 | default_prompt,
63 | title="Prompt",
64 | field="textarea",
65 | id="prompt",
66 | )
67 | seed: int = Field(
68 | 8638236174640251, min=0, title="Seed", field="seed", hide=True, id="seed"
69 | )
70 | steps: int = Field(
71 | 4, min=2, max=15, title="Steps", field="range", hide=True, id="steps"
72 | )
73 | width: int = Field(
74 | 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
75 | )
76 | height: int = Field(
77 | 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
78 | )
79 | guidance_scale: float = Field(
80 | 0.2,
81 | min=0,
82 | max=4,
83 | step=0.001,
84 | title="Guidance Scale",
85 | field="range",
86 | hide=True,
87 | id="guidance_scale",
88 | )
89 |
90 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
91 | self.pipe = DiffusionPipeline.from_pretrained(base_model, safety_checker=None)
92 | if args.taesd:
93 | self.pipe.vae = AutoencoderTiny.from_pretrained(
94 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True
95 | ).to(device)
96 |
97 | if args.pruna:
98 | # Create and smash your model
99 | smash_config = SmashConfig()
100 | # smash_config["cacher"] = "deepcache"
101 | smash_config["compiler"] = "stable_fast"
102 | self.pipe = smash(model=self.pipe, smash_config=smash_config)
103 |
104 | self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
105 | self.pipe.set_progress_bar_config(disable=True)
106 | self.pipe.load_lora_weights(lcm_lora_id, adapter_name="lcm")
107 | self.pipe.to(device=device, dtype=torch_dtype)
108 |
109 | if device.type != "mps":
110 | self.pipe.unet.to(memory_format=torch.channels_last)
111 |
112 | if args.torch_compile:
113 | self.pipe.unet = torch.compile(
114 | self.pipe.unet, mode="reduce-overhead", fullgraph=True
115 | )
116 | self.pipe.vae = torch.compile(
117 | self.pipe.vae, mode="reduce-overhead", fullgraph=True
118 | )
119 |
120 | self.pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
121 |
122 | if args.sfast:
123 | from sfast.compilers.stable_diffusion_pipeline_compiler import (
124 | compile,
125 | CompilationConfig,
126 | )
127 |
128 | config = CompilationConfig.Default()
129 | config.enable_xformers = True
130 | config.enable_triton = True
131 | config.enable_cuda_graph = True
132 | self.pipe = compile(self.pipe, config=config)
133 |
134 | if args.compel:
135 | self.compel_proc = Compel(
136 | tokenizer=self.pipe.tokenizer,
137 | text_encoder=self.pipe.text_encoder,
138 | truncate_long_prompts=False,
139 | )
140 |
141 | def predict(self, params: "Pipeline.InputParams") -> Image.Image:
142 | generator = torch.manual_seed(params.seed)
143 | prompt_embeds = None
144 | prompt = params.prompt
145 | if hasattr(self, "compel_proc"):
146 | prompt_embeds = self.compel_proc(params.prompt)
147 | prompt = None
148 |
149 | results = self.pipe(
150 | prompt=prompt,
151 | prompt_embeds=prompt_embeds,
152 | generator=generator,
153 | num_inference_steps=params.steps,
154 | guidance_scale=params.guidance_scale,
155 | width=params.width,
156 | height=params.height,
157 | output_type="pil",
158 | )
159 |
160 | return results.images[0]
161 |
--------------------------------------------------------------------------------
/server/pipelines/txt2imgLoraSDXL.py:
--------------------------------------------------------------------------------
1 | from diffusers import DiffusionPipeline, LCMScheduler, AutoencoderKL, AutoencoderTiny
2 | from compel import Compel, ReturnedEmbeddingsType
3 | import torch
4 |
5 | try:
6 | import intel_extension_for_pytorch as ipex # type: ignore
7 | except:
8 | pass
9 |
10 | import psutil
11 | from config import Args
12 | from pydantic import BaseModel, Field
13 | from util import ParamsModel
14 | from PIL import Image
15 |
16 | model_id = "stabilityai/stable-diffusion-xl-base-1.0"
17 | lcm_lora_id = "latent-consistency/lcm-lora-sdxl"
18 | taesd_model = "madebyollin/taesdxl"
19 |
20 |
21 | default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux"
22 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
23 | page_content = """
24 | Real-Time Latent Consistency Model
25 | Text-to-Image SDXL + LCM + LoRA
26 |
27 | This demo showcases
28 | LCM LoRA
33 | Text to Image pipeline using
34 | Diffusers with a MJPEG stream server.
39 |
40 |
41 | Change the prompt to generate different images, accepts Compel syntax.
46 |
47 | """
48 |
49 |
50 | class Pipeline:
51 | class Info(BaseModel):
52 | name: str = "LCM+Lora+SDXL"
53 | title: str = "Text-to-Image SDXL + LCM + LoRA"
54 | description: str = "Generates an image from a text prompt"
55 | page_content: str = page_content
56 | input_mode: str = "text"
57 |
58 | class InputParams(ParamsModel):
59 | prompt: str = Field(
60 | default_prompt,
61 | title="Prompt",
62 | field="textarea",
63 | id="prompt",
64 | )
65 | negative_prompt: str = Field(
66 | default_negative_prompt,
67 | title="Negative Prompt",
68 | field="textarea",
69 | id="negative_prompt",
70 | hide=True,
71 | )
72 | seed: int = Field(
73 | 2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
74 | )
75 | steps: int = Field(
76 | 4, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
77 | )
78 | width: int = Field(
79 | 1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
80 | )
81 | height: int = Field(
82 | 1024, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
83 | )
84 | guidance_scale: float = Field(
85 | 1.0,
86 | min=0,
87 | max=20,
88 | step=0.001,
89 | title="Guidance Scale",
90 | field="range",
91 | hide=True,
92 | id="guidance_scale",
93 | )
94 |
95 | def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
96 | vae = AutoencoderKL.from_pretrained(
97 | "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype
98 | )
99 |
100 | self.pipe = DiffusionPipeline.from_pretrained(
101 | model_id,
102 | safety_checker=None,
103 | vae=vae,
104 | )
105 | # Load LCM LoRA
106 | self.pipe.load_lora_weights(lcm_lora_id, adapter_name="lcm")
107 | self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
108 | self.pipe.set_progress_bar_config(disable=True)
109 | self.pipe.to(device=device, dtype=torch_dtype).to(device)
110 |
111 | if args.sfast:
112 | from sfast.compilers.stable_diffusion_pipeline_compiler import (
113 | compile,
114 | CompilationConfig,
115 | )
116 |
117 | config = CompilationConfig.Default()
118 | config.enable_xformers = True
119 | config.enable_triton = True
120 | config.enable_cuda_graph = True
121 | self.pipe = compile(self.pipe, config=config)
122 |
123 | if device.type != "mps":
124 | self.pipe.unet.to(memory_format=torch.channels_last)
125 |
126 | self.pipe.compel_proc = Compel(
127 | tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
128 | text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
129 | returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
130 | requires_pooled=[False, True],
131 | )
132 | if args.taesd:
133 | self.pipe.vae = AutoencoderTiny.from_pretrained(
134 | taesd_model, torch_dtype=torch_dtype, use_safetensors=True
135 | ).to(device)
136 |
137 | if args.torch_compile:
138 | self.pipe.unet = torch.compile(
139 | self.pipe.unet, mode="reduce-overhead", fullgraph=True
140 | )
141 | self.pipe.vae = torch.compile(
142 | self.pipe.vae, mode="reduce-overhead", fullgraph=True
143 | )
144 | self.pipe(
145 | prompt="warmup",
146 | )
147 |
148 | def predict(self, params: "Pipeline.InputParams") -> Image.Image:
149 | generator = torch.manual_seed(params.seed)
150 |
151 | prompt = params.prompt
152 | negative_prompt = params.negative_prompt
153 | prompt_embeds = None
154 | pooled_prompt_embeds = None
155 | negative_prompt_embeds = None
156 | negative_pooled_prompt_embeds = None
157 | if hasattr(self.pipe, "compel_proc"):
158 | _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
159 | [params.prompt, params.negative_prompt]
160 | )
161 | prompt = None
162 | negative_prompt = None
163 | prompt_embeds = _prompt_embeds[0:1]
164 | pooled_prompt_embeds = pooled_prompt_embeds[0:1]
165 | negative_prompt_embeds = _prompt_embeds[1:2]
166 | negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2]
167 |
168 | results = self.pipe(
169 | prompt=prompt,
170 | negative_prompt=negative_prompt,
171 | prompt_embeds=prompt_embeds,
172 | pooled_prompt_embeds=pooled_prompt_embeds,
173 | negative_prompt_embeds=negative_prompt_embeds,
174 | negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
175 | generator=generator,
176 | num_inference_steps=params.steps,
177 | guidance_scale=params.guidance_scale,
178 | width=params.width,
179 | height=params.height,
180 | output_type="pil",
181 | )
182 |
183 | return results.images[0]
184 |
--------------------------------------------------------------------------------
/server/pipelines/utils/canny_gpu.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torchvision.transforms import ToTensor, ToPILImage
4 | from PIL import Image
5 |
6 |
7 | class SobelOperator(nn.Module):
8 | SOBEL_KERNEL_X = torch.tensor(
9 | [[-1.0, 0.0, 1.0], [-2.0, 0.0, 2.0], [-1.0, 0.0, 1.0]]
10 | )
11 | SOBEL_KERNEL_Y = torch.tensor(
12 | [[-1.0, -2.0, -1.0], [0.0, 0.0, 0.0], [1.0, 2.0, 1.0]]
13 | )
14 |
15 | def __init__(self, device="cuda"):
16 | super(SobelOperator, self).__init__()
17 | self.device = device
18 | self.edge_conv_x = nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False).to(
19 | self.device
20 | )
21 | self.edge_conv_y = nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False).to(
22 | self.device
23 | )
24 | self.edge_conv_x.weight = nn.Parameter(
25 | self.SOBEL_KERNEL_X.view((1, 1, 3, 3)).to(self.device)
26 | )
27 | self.edge_conv_y.weight = nn.Parameter(
28 | self.SOBEL_KERNEL_Y.view((1, 1, 3, 3)).to(self.device)
29 | )
30 |
31 | @torch.no_grad()
32 | def forward(
33 | self,
34 | image: Image.Image,
35 | low_threshold: float,
36 | high_threshold: float,
37 | output_type="pil",
38 | ) -> Image.Image | torch.Tensor | tuple[Image.Image, torch.Tensor]:
39 | # Convert PIL image to PyTorch tensor
40 | image_gray = image.convert("L")
41 | image_tensor = ToTensor()(image_gray).unsqueeze(0).to(self.device)
42 |
43 | # Compute gradients
44 | edge_x = self.edge_conv_x(image_tensor)
45 | edge_y = self.edge_conv_y(image_tensor)
46 | edge = torch.sqrt(torch.square(edge_x) + torch.square(edge_y))
47 |
48 | # Apply thresholding
49 | edge.div_(edge.max()) # Normalize to 0-1 (in-place operation)
50 | edge[edge >= high_threshold] = 1.0
51 | edge[edge <= low_threshold] = 0.0
52 |
53 | # Convert the result back to a PIL image
54 | if output_type == "pil":
55 | return ToPILImage()(edge.squeeze(0).cpu())
56 | elif output_type == "tensor":
57 | return edge
58 | elif output_type == "pil,tensor":
59 | return ToPILImage()(edge.squeeze(0).cpu()), edge
60 |
61 |
62 | class ScharrOperator(nn.Module):
63 | SCHARR_KERNEL_X = torch.tensor(
64 | [[-3.0, 0.0, 3.0], [-10.0, 0.0, 10.0], [-3.0, 0.0, 3.0]]
65 | )
66 | SCHARR_KERNEL_Y = torch.tensor(
67 | [[-3.0, -10.0, -3.0], [0.0, 0.0, 0.0], [3.0, 10.0, 3.0]]
68 | )
69 |
70 | def __init__(self, device="cuda"):
71 | super(ScharrOperator, self).__init__()
72 | self.device = device
73 | self.edge_conv_x = nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False).to(
74 | self.device
75 | )
76 | self.edge_conv_y = nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False).to(
77 | self.device
78 | )
79 | self.edge_conv_x.weight = nn.Parameter(
80 | self.SCHARR_KERNEL_X.view((1, 1, 3, 3)).to(self.device)
81 | )
82 | self.edge_conv_y.weight = nn.Parameter(
83 | self.SCHARR_KERNEL_Y.view((1, 1, 3, 3)).to(self.device)
84 | )
85 |
86 | @torch.no_grad()
87 | def forward(
88 | self,
89 | image: Image.Image,
90 | low_threshold: float,
91 | high_threshold: float,
92 | output_type="pil",
93 | invert: bool = False,
94 | ) -> Image.Image | torch.Tensor | tuple[Image.Image, torch.Tensor]:
95 | # Convert PIL image to PyTorch tensor
96 | image_gray = image.convert("L")
97 | image_tensor = ToTensor()(image_gray).unsqueeze(0).to(self.device)
98 |
99 | # Compute gradients
100 | edge_x = self.edge_conv_x(image_tensor)
101 | edge_y = self.edge_conv_y(image_tensor)
102 | edge = torch.abs(edge_x) + torch.abs(edge_y)
103 |
104 | # Apply thresholding
105 | edge.div_(edge.max()) # Normalize to 0-1 (in-place operation)
106 | edge[edge >= high_threshold] = 1.0
107 | edge[edge <= low_threshold] = 0.0
108 | if invert:
109 | edge = 1 - edge
110 |
111 | # Convert the result back to a PIL image
112 | if output_type == "pil":
113 | return ToPILImage()(edge.squeeze(0).cpu())
114 | elif output_type == "tensor":
115 | return edge
116 | elif output_type == "pil,tensor":
117 | return ToPILImage()(edge.squeeze(0).cpu()), edge
118 |
--------------------------------------------------------------------------------
/server/pipelines/utils/safety_checker.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import torch
16 | import torch.nn as nn
17 | from transformers import CLIPConfig, CLIPVisionModel, PreTrainedModel
18 | from PIL import Image
19 |
20 |
21 | def cosine_distance(image_embeds, text_embeds):
22 | normalized_image_embeds = nn.functional.normalize(image_embeds)
23 | normalized_text_embeds = nn.functional.normalize(text_embeds)
24 | return torch.mm(normalized_image_embeds, normalized_text_embeds.t())
25 |
26 |
27 | class StableDiffusionSafetyChecker(PreTrainedModel):
28 | config_class = CLIPConfig
29 |
30 | _no_split_modules = ["CLIPEncoderLayer"]
31 |
32 | def __init__(self, config: CLIPConfig):
33 | super().__init__(config)
34 |
35 | self.vision_model = CLIPVisionModel(config.vision_config)
36 | self.visual_projection = nn.Linear(
37 | config.vision_config.hidden_size, config.projection_dim, bias=False
38 | )
39 |
40 | self.concept_embeds = nn.Parameter(
41 | torch.ones(17, config.projection_dim), requires_grad=False
42 | )
43 | self.special_care_embeds = nn.Parameter(
44 | torch.ones(3, config.projection_dim), requires_grad=False
45 | )
46 |
47 | self.concept_embeds_weights = nn.Parameter(torch.ones(17), requires_grad=False)
48 | self.special_care_embeds_weights = nn.Parameter(
49 | torch.ones(3), requires_grad=False
50 | )
51 |
52 | @torch.no_grad()
53 | def forward(self, clip_input, images):
54 | pooled_output = self.vision_model(clip_input)[1] # pooled_output
55 | image_embeds = self.visual_projection(pooled_output)
56 |
57 | # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16
58 | special_cos_dist = (
59 | cosine_distance(image_embeds, self.special_care_embeds)
60 | .cpu()
61 | .float()
62 | .numpy()
63 | )
64 | cos_dist = (
65 | cosine_distance(image_embeds, self.concept_embeds).cpu().float().numpy()
66 | )
67 |
68 | result = []
69 | batch_size = image_embeds.shape[0]
70 | for i in range(batch_size):
71 | result_img = {
72 | "special_scores": {},
73 | "special_care": [],
74 | "concept_scores": {},
75 | "bad_concepts": [],
76 | }
77 |
78 | # increase this value to create a stronger `nfsw` filter
79 | # at the cost of increasing the possibility of filtering benign images
80 | adjustment = 0.0
81 |
82 | for concept_idx in range(len(special_cos_dist[0])):
83 | concept_cos = special_cos_dist[i][concept_idx]
84 | concept_threshold = self.special_care_embeds_weights[concept_idx].item()
85 | result_img["special_scores"][concept_idx] = round(
86 | concept_cos - concept_threshold + adjustment, 3
87 | )
88 | if result_img["special_scores"][concept_idx] > 0:
89 | result_img["special_care"].append(
90 | {concept_idx, result_img["special_scores"][concept_idx]}
91 | )
92 | adjustment = 0.01
93 |
94 | for concept_idx in range(len(cos_dist[0])):
95 | concept_cos = cos_dist[i][concept_idx]
96 | concept_threshold = self.concept_embeds_weights[concept_idx].item()
97 | result_img["concept_scores"][concept_idx] = round(
98 | concept_cos - concept_threshold + adjustment, 3
99 | )
100 | if result_img["concept_scores"][concept_idx] > 0:
101 | result_img["bad_concepts"].append(concept_idx)
102 |
103 | result.append(result_img)
104 |
105 | has_nsfw_concepts = [len(res["bad_concepts"]) > 0 for res in result]
106 |
107 | return has_nsfw_concepts
108 |
109 | @torch.no_grad()
110 | def forward_onnx(self, clip_input: torch.FloatTensor, images: torch.FloatTensor):
111 | pooled_output = self.vision_model(clip_input)[1] # pooled_output
112 | image_embeds = self.visual_projection(pooled_output)
113 |
114 | special_cos_dist = cosine_distance(image_embeds, self.special_care_embeds)
115 | cos_dist = cosine_distance(image_embeds, self.concept_embeds)
116 |
117 | # increase this value to create a stronger `nsfw` filter
118 | # at the cost of increasing the possibility of filtering benign images
119 | adjustment = 0.0
120 |
121 | special_scores = (
122 | special_cos_dist - self.special_care_embeds_weights + adjustment
123 | )
124 | # special_scores = special_scores.round(decimals=3)
125 | special_care = torch.any(special_scores > 0, dim=1)
126 | special_adjustment = special_care * 0.01
127 | special_adjustment = special_adjustment.unsqueeze(1).expand(
128 | -1, cos_dist.shape[1]
129 | )
130 |
131 | concept_scores = (cos_dist - self.concept_embeds_weights) + special_adjustment
132 | # concept_scores = concept_scores.round(decimals=3)
133 | has_nsfw_concepts = torch.any(concept_scores > 0, dim=1)
134 |
135 | images[has_nsfw_concepts] = 0.0 # black image
136 |
137 | return images, has_nsfw_concepts
138 |
139 |
140 | class SafetyChecker:
141 | def __init__(self, device="cuda"):
142 | from transformers import CLIPFeatureExtractor
143 |
144 | self.device = device
145 | self.safety_checker = StableDiffusionSafetyChecker.from_pretrained(
146 | "CompVis/stable-diffusion-safety-checker"
147 | ).to(device)
148 | self.feature_extractor = CLIPFeatureExtractor.from_pretrained(
149 | "openai/clip-vit-base-patch32"
150 | )
151 |
152 | def __call__(
153 | self, images: list[Image.Image] | Image.Image
154 | ) -> tuple[list[Image.Image], list[bool]] | tuple[Image.Image, bool]:
155 | images_list = [images] if isinstance(images, Image.Image) else images
156 |
157 | safety_checker_input = self.feature_extractor(
158 | images_list, return_tensors="pt"
159 | ).to(self.device)
160 |
161 | has_nsfw_concepts = self.safety_checker(
162 | images=[images_list],
163 | clip_input=safety_checker_input.pixel_values.to(self.device),
164 | )
165 |
166 | if isinstance(images, Image.Image):
167 | return images, has_nsfw_concepts[0]
168 |
169 | return images, has_nsfw_concepts
170 |
--------------------------------------------------------------------------------
/server/requirements.txt:
--------------------------------------------------------------------------------
1 | # Use with: uv pip install --no-cache --system --index-strategy=unsafe-best-match -r requirements.txt
2 | numpy
3 | diffusers<=0.33.1
4 | llvmlite>=0.39.0
5 | numba>=0.56.0
6 | transformers
7 | pydantic
8 | huggingface-hub
9 | hf_transfer
10 | fastapi
11 | uvicorn[standard]
12 | Pillow==11.0.0
13 | accelerate
14 | compel==2.0.2
15 | controlnet-aux==0.0.9
16 | peft
17 | markdown2
18 | safetensors
19 | setuptools
20 | mpmath
21 | controlnet-aux
22 | sentencepiece==0.2.0
23 | optimum-quanto # has to be optimum-quanto==0.2.5 for pruna int4
24 | gguf
25 | types-Pillow
26 | mypy
27 | python-dotenv
28 | requests>=2.31.0 # Added explicitly to resolve dependency conflict
29 |
30 | --extra-index-url https://download.pytorch.org/whl/cu118
31 | torch==2.5.1
32 | torchvision
33 | torchaudio
34 | xformers; sys_platform != 'darwin' or platform_machine != 'arm64'
35 | pruna[stable-fast] ; sys_platform != 'darwin' or platform_machine != 'arm64'
36 |
37 | # stable_fast @ https://github.com/chengzeyi/stable-fast/releases/download/nightly/stable_fast-1.0.5.dev20241127+torch230cu121-cp310-cp310-manylinux2014_x86_64.whl ; sys_platform != 'darwin' or platform_machine != 'arm64'
38 | #oneflow @ https://github.com/siliconflow/oneflow_releases/releases/download/community_cu121/oneflow-0.9.1.dev20241114%2Bcu121-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl ; sys_platform != 'darwin' or platform_machine != 'arm64'
39 | #onediff @ git+https://github.com/siliconflow/onediff.git@main#egg=onediff ; sys_platform != 'darwin' or platform_machine != 'arm64'
--------------------------------------------------------------------------------
/server/util.py:
--------------------------------------------------------------------------------
1 | from importlib import import_module
2 | from typing import Any, TypeVar
3 | from PIL import Image
4 | import io
5 | from pydantic import BaseModel
6 |
7 |
8 | # Used only for type checking the pipeline class
9 | TPipeline = TypeVar("TPipeline", bound=type[Any])
10 |
11 |
12 | class ParamsModel(BaseModel):
13 | """Base model for pipeline parameters."""
14 |
15 | model_config = {
16 | "arbitrary_types_allowed": True,
17 | "extra": "allow", # Allow extra attributes for dynamic fields like 'image'
18 | }
19 |
20 | @classmethod
21 | def from_dict(cls, data: dict[str, Any]) -> "ParamsModel":
22 | """Create a model instance from dictionary data."""
23 | return cls.model_validate(data)
24 |
25 | def to_dict(self) -> dict[str, Any]:
26 | """Convert model to dictionary."""
27 | return self.model_dump()
28 |
29 |
30 | def get_pipeline_class(pipeline_name: str) -> type:
31 | """
32 | Dynamically imports and returns the Pipeline class from a specified module.
33 |
34 | Args:
35 | pipeline_name: The name of the pipeline module to import
36 |
37 | Returns:
38 | The Pipeline class from the specified module
39 |
40 | Raises:
41 | ValueError: If the module or Pipeline class isn't found
42 | TypeError: If Pipeline is not a class
43 | """
44 | try:
45 | module = import_module(f"pipelines.{pipeline_name}")
46 | except ModuleNotFoundError:
47 | raise ValueError(f"Pipeline {pipeline_name} module not found")
48 |
49 | pipeline_class = getattr(module, "Pipeline", None)
50 |
51 | if pipeline_class is None:
52 | raise ValueError(f"'Pipeline' class not found in module '{pipeline_name}'.")
53 |
54 | # Type check to ensure we're returning a class
55 | if not isinstance(pipeline_class, type):
56 | raise TypeError(f"'Pipeline' in module '{pipeline_name}' is not a class")
57 |
58 | return pipeline_class
59 |
60 |
61 | def bytes_to_pil(image_bytes: bytes) -> Image.Image:
62 | image = Image.open(io.BytesIO(image_bytes))
63 | return image
64 |
65 |
66 | def pil_to_frame(image: Image.Image) -> bytes:
67 | frame_data = io.BytesIO()
68 | image.save(frame_data, format="JPEG", quality=80, optimize=True, progressive=True)
69 | frame_data = frame_data.getvalue()
70 | return (
71 | b"--frame\r\n"
72 | + b"Content-Type: image/jpeg\r\n"
73 | + f"Content-Length: {len(frame_data)}\r\n\r\n".encode()
74 | + frame_data
75 | + b"\r\n"
76 | )
77 |
78 |
79 | def is_firefox(user_agent: str) -> bool:
80 | return "Firefox" in user_agent
81 |
--------------------------------------------------------------------------------