├── .gitattributes
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── build-run.sh
├── frontend
    ├── .gitignore
    ├── .npmrc
    ├── .nvmrc
    ├── .prettierignore
    ├── .prettierrc
    ├── README.md
    ├── package-lock.json
    ├── package.json
    ├── src
    │   ├── app.css
    │   ├── app.d.ts
    │   ├── app.html
    │   ├── lib
    │   │   ├── components
    │   │   │   ├── AspectRatioSelect.svelte
    │   │   │   ├── Button.svelte
    │   │   │   ├── Checkbox.svelte
    │   │   │   ├── ImagePlayer.svelte
    │   │   │   ├── InputRange.svelte
    │   │   │   ├── MediaListSwitcher.svelte
    │   │   │   ├── PipelineOptions.svelte
    │   │   │   ├── SeedInput.svelte
    │   │   │   ├── Selectlist.svelte
    │   │   │   ├── TextArea.svelte
    │   │   │   ├── VideoInput.svelte
    │   │   │   └── Warning.svelte
    │   │   ├── icons
    │   │   │   ├── aspect.svelte
    │   │   │   ├── expand.svelte
    │   │   │   ├── floppy.svelte
    │   │   │   ├── screen.svelte
    │   │   │   └── spinner.svelte
    │   │   ├── index.ts
    │   │   ├── lcmLive.ts
    │   │   ├── mediaStream.ts
    │   │   ├── store.ts
    │   │   ├── types.ts
    │   │   └── utils.ts
    │   ├── piexifjs.d.ts
    │   └── routes
    │   │   ├── +layout.svelte
    │   │   ├── +page.svelte
    │   │   └── +page.ts
    ├── static
    │   └── favicon.png
    ├── svelte.config.js
    ├── tailwind.config.js
    ├── tsconfig.json
    └── vite.config.ts
├── qr-code.png
└── server
    ├── config.py
    ├── connection_manager.py
    ├── device.py
    ├── main.py
    ├── pipelines
        ├── IPcompositionHyperSD15.py
        ├── IPcompositionHyperSDXL.py
        ├── __init__.py
        ├── controlnet.py
        ├── controlnetDepthFlashSD.py
        ├── controlnetDepthHyperSD.py
        ├── controlnetDepthHyperSDXL.py
        ├── controlnetFlashSD.py
        ├── controlnetFlashSDXL.py
        ├── controlnetHyperSD.py
        ├── controlnetHyperSDXL.py
        ├── controlnetLoraSD15.py
        ├── controlnetLoraSD15QRCode.py
        ├── controlnetLoraSDXL-Lightning.py
        ├── controlnetLoraSDXL.py
        ├── controlnetMistoLineHyperSDXL.py
        ├── controlnetPCMSD15.py
        ├── controlnetSDTurbo.py
        ├── controlnetSDXLTurbo.py
        ├── controlnetSegmindVegaRT.py
        ├── img2img.py
        ├── img2imgFlux.py
        ├── img2imgSDTurbo.py
        ├── img2imgSDXL-Lightning.py
        ├── img2imgSDXLTurbo.py
        ├── img2imgSDXS512.py
        ├── img2imgSegmindVegaRT.py
        ├── pix2pix
        │   ├── __init__.py
        │   ├── model.py
        │   └── pix2pix_turbo.py
        ├── pix2pixTurbo.py
        ├── txt2img.py
        ├── txt2imgLora.py
        ├── txt2imgLoraSDXL.py
        └── utils
        │   ├── canny_gpu.py
        │   └── safety_checker.py
    ├── requirements.txt
    └── util.py


/.gitattributes:
--------------------------------------------------------------------------------
 1 | *.7z filter=lfs diff=lfs merge=lfs -text
 2 | *.arrow filter=lfs diff=lfs merge=lfs -text
 3 | *.bin filter=lfs diff=lfs merge=lfs -text
 4 | *.bz2 filter=lfs diff=lfs merge=lfs -text
 5 | *.ckpt filter=lfs diff=lfs merge=lfs -text
 6 | *.ftz filter=lfs diff=lfs merge=lfs -text
 7 | *.gz filter=lfs diff=lfs merge=lfs -text
 8 | *.h5 filter=lfs diff=lfs merge=lfs -text
 9 | *.joblib filter=lfs diff=lfs merge=lfs -text
10 | *.lfs.* filter=lfs diff=lfs merge=lfs -text
11 | *.mlmodel filter=lfs diff=lfs merge=lfs -text
12 | *.model filter=lfs diff=lfs merge=lfs -text
13 | *.msgpack filter=lfs diff=lfs merge=lfs -text
14 | *.npy filter=lfs diff=lfs merge=lfs -text
15 | *.npz filter=lfs diff=lfs merge=lfs -text
16 | *.onnx filter=lfs diff=lfs merge=lfs -text
17 | *.ot filter=lfs diff=lfs merge=lfs -text
18 | *.parquet filter=lfs diff=lfs merge=lfs -text
19 | *.pb filter=lfs diff=lfs merge=lfs -text
20 | *.pickle filter=lfs diff=lfs merge=lfs -text
21 | *.pkl filter=lfs diff=lfs merge=lfs -text
22 | *.pt filter=lfs diff=lfs merge=lfs -text
23 | *.pth filter=lfs diff=lfs merge=lfs -text
24 | *.rar filter=lfs diff=lfs merge=lfs -text
25 | *.safetensors filter=lfs diff=lfs merge=lfs -text
26 | saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27 | *.tar.* filter=lfs diff=lfs merge=lfs -text
28 | *.tar filter=lfs diff=lfs merge=lfs -text
29 | *.tflite filter=lfs diff=lfs merge=lfs -text
30 | *.tgz filter=lfs diff=lfs merge=lfs -text
31 | *.wasm filter=lfs diff=lfs merge=lfs -text
32 | *.xz filter=lfs diff=lfs merge=lfs -text
33 | *.zip filter=lfs diff=lfs merge=lfs -text
34 | *.zst filter=lfs diff=lfs merge=lfs -text
35 | *tfevents* filter=lfs diff=lfs merge=lfs -text
36 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | venv/
3 | public/
4 | *.pem


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
 2 | 
 3 | ARG DEBIAN_FRONTEND=noninteractive
 4 | 
 5 | ENV PYTHONUNBUFFERED=1
 6 | ENV NODE_MAJOR=20
 7 | 
 8 | RUN apt-get update && apt-get install --no-install-recommends -y \
 9 |     build-essential \
10 |     python3.9 \
11 |     python3-pip \
12 |     python3-dev \
13 |     git \
14 |     ffmpeg \
15 |     google-perftools \
16 |     ca-certificates curl gnupg \
17 |     && apt-get clean && rm -rf /var/lib/apt/lists/*
18 | 
19 | WORKDIR /code
20 | 
21 | RUN mkdir -p /etc/apt/keyrings 
22 | RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
23 | 
24 | RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_MAJOR}.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list > /dev/null
25 | RUN apt-get update && apt-get install nodejs -y
26 | 
27 | COPY ./server/requirements.txt /code/requirements.txt
28 | 
29 | # Download and install UV
30 | ADD https://astral.sh/uv/install.sh /uv-installer.sh
31 | RUN chmod +x /uv-installer.sh && \
32 |     /uv-installer.sh && \
33 |     rm /uv-installer.sh
34 | 
35 | ENV PATH="/root/.local/bin:$PATH"
36 | 
37 | # Set up a new user named "user" with user ID 1000
38 | RUN useradd -m -u 1000 user
39 | 
40 | # Install dependencies using UV as root
41 | RUN uv pip install --no-cache --system --index-strategy=unsafe-best-match -r /code/requirements.txt 
42 | 
43 | # Switch to the "user" user
44 | USER user
45 | 
46 | # Set home to the user's home directory
47 | ENV HOME=/home/user \
48 |     PATH=/home/user/.local/bin:/root/.local/bin:$PATH \
49 |     PYTHONPATH=$HOME/app \
50 |     PYTHONUNBUFFERED=1 \
51 |     SYSTEM=spaces
52 | 
53 | # Set the working directory to the user's home directory
54 | WORKDIR $HOME/app
55 | 
56 | # Copy the current directory contents into the container at $HOME/app setting the owner to the user
57 | COPY --chown=user . $HOME/app
58 | 
59 | ENV LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4
60 | CMD ["./build-run.sh"]


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Real-Time Latent Consistency Model Image-to-Image ControlNet
  3 | emoji: 🖼️🖼️
  4 | colorFrom: gray
  5 | colorTo: indigo
  6 | sdk: docker
  7 | pinned: false
  8 | suggested_hardware: a10g-small
  9 | disable_embedding: true
 10 | ---
 11 | 
 12 | # Real-Time Latent Consistency Model
 13 | 
 14 | This demo showcases [Latent Consistency Model (LCM)](https://latent-consistency-models.github.io/) using [Diffusers](https://huggingface.co/docs/diffusers/using-diffusers/lcm) with a MJPEG stream server. You can read more about LCM + LoRAs with diffusers [here](https://huggingface.co/blog/lcm_lora).
 15 | 
 16 | You need a webcam to run this demo. 🤗
 17 | 
 18 | See a collecting with live demos [here](https://huggingface.co/collections/latent-consistency/latent-consistency-model-demos-654e90c52adb0688a0acbe6f)
 19 | 
 20 | ## Running Locally
 21 | 
 22 | You need CUDA and Python 3.10, Node > 19, Mac with an M1/M2/M3 chip or Intel Arc GPU
 23 | 
 24 | 
 25 | ## Install
 26 | 
 27 | ```bash
 28 | uv venv --python=3.10  
 29 | source .venv/bin/activate
 30 | uv pip install -r server/requirements.txt
 31 | cd frontend && npm install && npm run build && cd ..
 32 | python server/main.py --reload --pipeline img2imgSDTurbo 
 33 |  ```
 34 | 
 35 | Don't forget to fuild the frontend!!! 
 36 | 
 37 | ```bash
 38 | cd frontend && npm install && npm run build && cd ..
 39 | ```
 40 | 
 41 | # Pipelines
 42 | You can build your own pipeline following examples here [here](pipelines),
 43 | 
 44 | 
 45 | # LCM
 46 | ### Image to Image
 47 | 
 48 | ```bash
 49 | python server/main.py --reload --pipeline img2img 
 50 | ```
 51 | 
 52 | # LCM
 53 | ### Text to Image
 54 | 
 55 | ```bash
 56 | python server/main.py --reload --pipeline txt2img 
 57 | ```
 58 | 
 59 | ### Image to Image ControlNet Canny
 60 | 
 61 | ```bash
 62 | python server/main.py --reload --pipeline controlnet 
 63 | ```
 64 | 
 65 | 
 66 | # LCM + LoRa
 67 | 
 68 | Using LCM-LoRA, giving it the super power of doing inference in as little as 4 steps. [Learn more here](https://huggingface.co/blog/lcm_lora) or [technical report](https://huggingface.co/papers/2311.05556)
 69 | 
 70 | 
 71 | ### Image to Image ControlNet Canny LoRa
 72 | 
 73 | ```bash
 74 | python server/main.py --reload --pipeline controlnetLoraSD15
 75 | ```
 76 | or SDXL, note that SDXL is slower than SD15 since the inference runs on 1024x1024 images
 77 | 
 78 | ```bash
 79 | python server/main.py --reload --pipeline controlnetLoraSDXL
 80 | ```
 81 | 
 82 | ### Text to Image
 83 | 
 84 | ```bash
 85 | python server/main.py --reload --pipeline txt2imgLora
 86 | ```
 87 | 
 88 | ```bash
 89 | python server/main.py --reload --pipeline txt2imgLoraSDXL
 90 | ```
 91 | # Available Pipelines
 92 | 
 93 | #### [LCM](https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7)
 94 | 
 95 | `img2img`  
 96 | `txt2img`   
 97 | `controlnet`   
 98 | `txt2imgLora`   
 99 | `controlnetLoraSD15` 
100 | 
101 | #### [SD15](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
102 | `controlnetLoraSDXL`   
103 | `txt2imgLoraSDXL`   
104 | 
105 | #### [SDXL Turbo](https://huggingface.co/stabilityai/sd-xl-turbo)
106 | 
107 | `img2imgSDXLTurbo`    
108 | `controlnetSDXLTurbo`   
109 | 
110 | 
111 | #### [SDTurbo](https://huggingface.co/stabilityai/sd-turbo)
112 | `img2imgSDTurbo`   
113 | `controlnetSDTurbo`   
114 | 
115 | #### [Segmind-Vega](https://huggingface.co/segmind/Segmind-Vega)
116 | `controlnetSegmindVegaRT`   
117 | `img2imgSegmindVegaRT`   
118 | 
119 | 
120 | ### Setting environment variables
121 | 
122 | 
123 | * `--host`: Host address (default: 0.0.0.0)  
124 | * `--port`: Port number (default: 7860)  
125 | * `--reload`: Reload code on change  
126 | * `--max-queue-size`: Maximum queue size (optional)
127 | * `--timeout`: Timeout period (optional)
128 | * `--safety-checker`: Enable Safety Checker (optional) 
129 | * `--torch-compile`: Use Torch Compile
130 | * `--use-taesd` / `--no-taesd`: Use Tiny Autoencoder  
131 | * `--pipeline`: Pipeline to use (default: "txt2img")  
132 | * `--ssl-certfile`: SSL Certificate File (optional)
133 | * `--ssl-keyfile`: SSL Key File (optional)
134 | * `--debug`: Print Inference time  
135 | * `--compel`: Compel option  
136 | * `--sfast`: Enable Stable Fast   
137 | * `--onediff`: Enable OneDiff
138 | 
139 | If you run using `bash build-run.sh` you can set `PIPELINE` variables to choose the pipeline you want to run
140 | 
141 | ```bash
142 | PIPELINE=txt2imgLoraSDXL bash build-run.sh
143 | ```
144 | 
145 | and setting environment variables
146 | 
147 | ```bash
148 | TIMEOUT=120 SAFETY_CHECKER=True MAX_QUEUE_SIZE=4 python server/main.py --reload --pipeline txt2imgLoraSDXL
149 | ```
150 | 
151 | If you're running locally and want to test it on Mobile Safari, the webserver needs to be served over HTTPS, or follow this instruction on my [comment](https://github.com/radames/Real-Time-Latent-Consistency-Model/issues/17#issuecomment-1811957196)
152 | 
153 | ```bash
154 | openssl req -newkey rsa:4096 -nodes -keyout key.pem -x509 -days 365 -out certificate.pem
155 | python server/main.py --reload --ssl-certfile=certificate.pem --ssl-keyfile=key.pem
156 | ```
157 | 
158 | ## Docker
159 | 
160 | You need NVIDIA Container Toolkit for Docker, defaults to `controlnet``
161 | 
162 | ```bash
163 | docker build -t lcm-live .
164 | docker run -ti -p 7860:7860 --gpus all lcm-live
165 | ```
166 | 
167 | reuse models data from host to avoid downloading them again, you can change `~/.cache/huggingface` to any other directory, but if you use hugingface-cli locally, you can share the same cache
168 | 
169 | ```bash
170 | docker run -ti -p 7860:7860 -e HF_HOME=/data -v ~/.cache/huggingface:/data  --gpus all lcm-live
171 | ```
172 |  
173 | 
174 | or with environment variables
175 | 
176 | ```bash
177 | docker run -ti -e PIPELINE=txt2imgLoraSDXL -p 7860:7860 --gpus all lcm-live
178 | ```
179 | 
180 | 
181 | # Demo on Hugging Face
182 | 
183 | 
184 | * [radames/Real-Time-Latent-Consistency-Model](https://huggingface.co/spaces/radames/Real-Time-Latent-Consistency-Model)  
185 | * [radames/Real-Time-SD-Turbo](https://huggingface.co/spaces/radames/Real-Time-SD-Turbo)  
186 | * [latent-consistency/Real-Time-LCM-ControlNet-Lora-SD1.5](https://huggingface.co/spaces/latent-consistency/Real-Time-LCM-ControlNet-Lora-SD1.5)  
187 | * [latent-consistency/Real-Time-LCM-Text-to-Image-Lora-SD1.5](https://huggingface.co/spaces/latent-consistency/Real-Time-LCM-Text-to-Image-Lora-SD1.5)  
188 | * [radames/Real-Time-Latent-Consistency-Model-Text-To-Image](https://huggingface.co/spaces/radames/Real-Time-Latent-Consistency-Model-Text-To-Image)  
189 | 
190 | 
191 | 
192 | 
193 | https://github.com/radames/Real-Time-Latent-Consistency-Model/assets/102277/c4003ac5-e7ff-44c0-97d3-464bb659de70
194 | 


--------------------------------------------------------------------------------
/build-run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | cd frontend
 3 | npm install
 4 | npm run build
 5 | if [ $? -eq 0 ]; then
 6 |     echo -e "\033[1;32m\nfrontend build success \033[0m"
 7 | else
 8 |     echo -e "\033[1;31m\nfrontend build failed\n\033[0m" >&2  exit 1
 9 | fi
10 | cd ../
11 | #check if var PIPELINE is set otherwise get default
12 | if [ -z ${PIPELINE+x} ]; then
13 |     PIPELINE="controlnet"
14 | fi
15 | if [ -z ${COMPILE+x} ]; then
16 |     COMPILE="--sfast"
17 | fi
18 | echo -e "\033[1;32m\npipeline: $PIPELINE \033[0m"
19 | echo -e "\033[1;32m\ncompile: $COMPILE \033[0m"
20 | python3 ./server/main.py --port 7860 --host 0.0.0.0 --pipeline $PIPELINE $COMPILE


--------------------------------------------------------------------------------
/frontend/.gitignore:
--------------------------------------------------------------------------------
 1 | node_modules
 2 | 
 3 | # Output
 4 | .output
 5 | .vercel
 6 | .netlify
 7 | .wrangler
 8 | /.svelte-kit
 9 | /build
10 | 
11 | # OS
12 | .DS_Store
13 | Thumbs.db
14 | 
15 | # Env
16 | .env
17 | .env.*
18 | !.env.example
19 | !.env.test
20 | 
21 | # Vite
22 | vite.config.js.timestamp-*
23 | vite.config.ts.timestamp-*
24 | public/
25 | 


--------------------------------------------------------------------------------
/frontend/.npmrc:
--------------------------------------------------------------------------------
1 | engine-strict=true
2 | 


--------------------------------------------------------------------------------
/frontend/.nvmrc:
--------------------------------------------------------------------------------
1 | v20.14.0
2 | 


--------------------------------------------------------------------------------
/frontend/.prettierignore:
--------------------------------------------------------------------------------
1 | # Package Managers
2 | package-lock.json
3 | pnpm-lock.yaml
4 | yarn.lock
5 | bun.lock
6 | bun.lockb


--------------------------------------------------------------------------------
/frontend/.prettierrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "plugins": ["prettier-plugin-svelte", "prettier-plugin-tailwindcss"],
 3 |   "overrides": [
 4 |     {
 5 |       "files": "*.svelte",
 6 |       "options": {
 7 |         "parser": "svelte"
 8 |       }
 9 |     }
10 |   ]
11 | }
12 | 


--------------------------------------------------------------------------------
/frontend/README.md:
--------------------------------------------------------------------------------
 1 | # sv
 2 | 
 3 | Everything you need to build a Svelte project, powered by [`sv`](https://github.com/sveltejs/cli).
 4 | 
 5 | ## Creating a project
 6 | 
 7 | If you're seeing this, you've probably already done this step. Congrats!
 8 | 
 9 | ```bash
10 | # create a new project in the current directory
11 | npx sv create
12 | 
13 | # create a new project in my-app
14 | npx sv create my-app
15 | ```
16 | 
17 | ## Developing
18 | 
19 | Once you've created a project and installed dependencies with `npm install` (or `pnpm install` or `yarn`), start a development server:
20 | 
21 | ```bash
22 | npm run dev
23 | 
24 | # or start the server and open the app in a new browser tab
25 | npm run dev -- --open
26 | ```
27 | 
28 | ## Building
29 | 
30 | To create a production version of your app:
31 | 
32 | ```bash
33 | npm run build
34 | ```
35 | 
36 | You can preview the production build with `npm run preview`.
37 | 
38 | > To deploy your app, you may need to install an [adapter](https://svelte.dev/docs/kit/adapters) for your target environment.
39 | 


--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "frontend",
 3 |   "private": true,
 4 |   "version": "0.0.1",
 5 |   "type": "module",
 6 |   "scripts": {
 7 |     "dev": "vite dev",
 8 |     "build": "vite build",
 9 |     "preview": "vite preview",
10 |     "prepare": "svelte-kit sync || echo ''",
11 |     "check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
12 |     "check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
13 |     "lint": "eslint . && prettier --check .",
14 |     "format": "prettier --write ."
15 |   },
16 |   "devDependencies": {
17 |     "@eslint/compat": "^1.2.5",
18 |     "@eslint/js": "^9.26.0",
19 |     "@sveltejs/adapter-static": "^3.0.8",
20 |     "@sveltejs/kit": "^2.16.0",
21 |     "@sveltejs/vite-plugin-svelte": "^5.0.0",
22 |     "@tailwindcss/typography": "^0.5.15",
23 |     "@tailwindcss/vite": "^4.1.5",
24 |     "eslint": "^9.26.0",
25 |     "eslint-config-prettier": "^10.0.1",
26 |     "eslint-plugin-svelte": "^3.0.0",
27 |     "globals": "^16.0.0",
28 |     "prettier": "^3.4.2",
29 |     "prettier-plugin-svelte": "^3.3.3",
30 |     "prettier-plugin-tailwindcss": "^0.6.11",
31 |     "svelte": "^5.0.0",
32 |     "svelte-check": "^4.0.0",
33 |     "tailwindcss": "^4.1.5",
34 |     "typescript": "^5.0.0",
35 |     "typescript-eslint": "^8.20.0",
36 |     "vite": "^6.2.6"
37 |   },
38 |   "dependencies": {
39 |     "piexifjs": "^1.0.6",
40 |     "rvfc-polyfill": "^1.0.7"
41 |   }
42 | }
43 | 


--------------------------------------------------------------------------------
/frontend/src/app.css:
--------------------------------------------------------------------------------
1 | @import "tailwindcss";
2 | @plugin '@tailwindcss/typography';
3 | 


--------------------------------------------------------------------------------
/frontend/src/app.d.ts:
--------------------------------------------------------------------------------
 1 | // See https://svelte.dev/docs/kit/types#app.d.ts
 2 | // for information about these interfaces
 3 | declare global {
 4 | 	namespace App {
 5 | 		// interface Error {}
 6 | 		// interface Locals {}
 7 | 		// interface PageData {}
 8 | 		// interface PageState {}
 9 | 		// interface Platform {}
10 | 	}
11 | }
12 | 
13 | export {};
14 | 


--------------------------------------------------------------------------------
/frontend/src/app.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 | 	<head>
 4 | 		<meta charset="utf-8" />
 5 | 		<link rel="icon" href="%sveltekit.assets%/favicon.png" />
 6 | 		<meta name="viewport" content="width=device-width, initial-scale=1" />
 7 | 		%sveltekit.head%
 8 | 	</head>
 9 | 	<body data-sveltekit-preload-data="hover">
10 | 		<div style="display: contents">%sveltekit.body%</div>
11 | 	</body>
12 | </html>
13 | 


--------------------------------------------------------------------------------
/frontend/src/lib/components/AspectRatioSelect.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   let {
 3 |     change,
 4 |     aspectRatio = $bindable(),
 5 |   }: { change: (aspectRatio: number) => void; aspectRatio: number } = $props();
 6 | 
 7 |   let options: string[] = ["1:1", "16:9", "4:3", "3:2", "3:4", "9:16"];
 8 | 
 9 |   function onChange(e: Event) {
10 |     const target = e.target as HTMLSelectElement;
11 |     const value = target.value;
12 |     const [width, height] = value.split(":").map((v) => parseInt(v));
13 |     aspectRatio = width / height;
14 |     change(aspectRatio);
15 |   }
16 | </script>
17 | 
18 | <div class="relative">
19 |   <select
20 |     onchange={onChange}
21 |     title="Aspect Ratio"
22 |     class="block cursor-pointer rounded-md border border-gray-800/50 bg-slate-100/30 p-1 font-medium text-white"
23 |   >
24 |     {#each options as option (option)}
25 |       <option value={option}>{option}</option>
26 |     {/each}
27 |   </select>
28 | </div>
29 | 


--------------------------------------------------------------------------------
/frontend/src/lib/components/Button.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   let props = $props();
 3 | </script>
 4 | 
 5 | <button {...props}>
 6 |   {@render props.children()}
 7 | </button>
 8 | 
 9 | <style lang="postcss">
10 |   @reference "tailwindcss";
11 |   button {
12 |     @apply cursor-pointer rounded bg-gray-700 font-normal text-white hover:bg-gray-800 disabled:cursor-not-allowed disabled:bg-gray-300 dark:disabled:bg-gray-700 dark:disabled:text-black;
13 |   }
14 | </style>
15 | 


--------------------------------------------------------------------------------
/frontend/src/lib/components/Checkbox.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   import type { FieldProps } from "$lib/types";
 3 |   import { onMount } from "svelte";
 4 |   let { value = $bindable(), params }: { value: boolean; params: FieldProps } =
 5 |     $props();
 6 | 
 7 |   onMount(() => {
 8 |     value = params?.default ? true : false;
 9 |   });
10 | </script>
11 | 
12 | <div class="grid max-w-md grid-cols-4 items-center justify-items-start gap-3">
13 |   <label class="text-sm font-medium" for={params.id}>{params?.title}</label>
14 |   <input
15 |     bind:checked={value}
16 |     type="checkbox"
17 |     id={params.id}
18 |     class="cursor-pointer"
19 |   />
20 | </div>
21 | 


--------------------------------------------------------------------------------
/frontend/src/lib/components/ImagePlayer.svelte:
--------------------------------------------------------------------------------
  1 | <script lang="ts">
  2 |   import { lcmLiveStatus, LCMLiveStatus, streamId } from "$lib/lcmLive";
  3 |   import { getPipelineValues } from "$lib/store";
  4 | 
  5 |   import Button from "$lib/components/Button.svelte";
  6 |   import Floppy from "$lib/icons/floppy.svelte";
  7 |   import Expand from "$lib/icons/expand.svelte";
  8 |   import { snapImage, expandWindow } from "$lib/utils";
  9 | 
 10 |   let isLCMRunning = $derived(
 11 |     $lcmLiveStatus !== LCMLiveStatus.DISCONNECTED &&
 12 |       $lcmLiveStatus !== LCMLiveStatus.ERROR,
 13 |   );
 14 | 
 15 |   let imageEl: HTMLImageElement | undefined = $state();
 16 |   let expandedWindow: Window | undefined = $state();
 17 |   let isExpanded = $state(false);
 18 | 
 19 |   async function takeSnapshot() {
 20 |     if (isLCMRunning && imageEl) {
 21 |       await snapImage(imageEl, {
 22 |         prompt: getPipelineValues()?.prompt as string,
 23 |         negative_prompt: getPipelineValues()?.negative_prompt as string,
 24 |         seed: getPipelineValues()?.seed as number,
 25 |         guidance_scale: getPipelineValues()?.guidance_scale as number,
 26 |       });
 27 |     }
 28 |   }
 29 |   async function toggleFullscreen() {
 30 |     if (isLCMRunning && !isExpanded) {
 31 |       expandedWindow = expandWindow("/api/stream/" + $streamId);
 32 |       expandedWindow.addEventListener("beforeunload", () => {
 33 |         isExpanded = false;
 34 |       });
 35 |       isExpanded = true;
 36 |     } else {
 37 |       expandedWindow?.close();
 38 |       isExpanded = false;
 39 |     }
 40 |   }
 41 | </script>
 42 | 
 43 | <div
 44 |   class="relative mx-auto aspect-square max-w-lg self-center overflow-hidden rounded-lg border border-slate-300"
 45 | >
 46 |   {#if $lcmLiveStatus === LCMLiveStatus.CONNECTING}
 47 |     <!-- Show connecting spinner -->
 48 |     <div class="flex h-full w-full items-center justify-center">
 49 |       <div
 50 |         class="h-16 w-16 animate-spin rounded-full border-b-2 border-white"
 51 |       ></div>
 52 |       <p class="ml-2 text-white">Connecting...</p>
 53 |     </div>
 54 |   {:else if isLCMRunning}
 55 |     {#if !isExpanded}
 56 |       <!-- Handle image error by adding onerror event -->
 57 |       <!-- svelte-ignore a11y_missing_attribute -->
 58 |       <img
 59 |         bind:this={imageEl}
 60 |         class="aspect-square w-full rounded-lg"
 61 |         src={"/api/stream/" + $streamId}
 62 |         onerror={(e) => {
 63 |           console.error("Image stream error:", e);
 64 |           // If stream fails to load, set status to error
 65 |           if ($lcmLiveStatus !== LCMLiveStatus.ERROR) {
 66 |             lcmLiveStatus.set(LCMLiveStatus.ERROR);
 67 |           }
 68 |         }}
 69 |       />
 70 |     {/if}
 71 |     <div class="absolute bottom-1 right-1">
 72 |       <Button
 73 |         onclick={toggleFullscreen}
 74 |         title="Expand Fullscreen"
 75 |         class="ml-auto rounded-lg p-1 text-sm text-white opacity-50 shadow-lg"
 76 |       >
 77 |         <Expand />
 78 |       </Button>
 79 |       <Button
 80 |         onclick={takeSnapshot}
 81 |         disabled={!isLCMRunning}
 82 |         title="Take Snapshot"
 83 |         class="ml-auto rounded-lg p-1 text-sm text-white opacity-50 shadow-lg"
 84 |       >
 85 |         <Floppy />
 86 |       </Button>
 87 |     </div>
 88 |   {:else if $lcmLiveStatus === LCMLiveStatus.ERROR}
 89 |     <!-- Show error state with red border -->
 90 |     <div
 91 |       class="flex h-full w-full items-center justify-center rounded-lg border-2 border-red-500 bg-gray-900"
 92 |     >
 93 |       <p class="p-4 text-center text-white">Connection error</p>
 94 |     </div>
 95 |   {:else}
 96 |     <!-- svelte-ignore a11y_missing_attribute -->
 97 |     <img
 98 |       class="aspect-square w-full rounded-lg"
 99 |       src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII="
100 |     />
101 |   {/if}
102 | </div>
103 | 


--------------------------------------------------------------------------------
/frontend/src/lib/components/InputRange.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   import type { FieldProps } from "$lib/types";
 3 |   import { onMount } from "svelte";
 4 |   let { value = $bindable(), params }: { value: number; params: FieldProps } =
 5 |     $props();
 6 | 
 7 |   onMount(() => {
 8 |     value = Number(params?.default ?? "");
 9 |   });
10 | </script>
11 | 
12 | <div class="grid max-w-md grid-cols-4 items-center gap-3">
13 |   <label class="text-sm font-medium" for={params.id}>{params?.title}</label>
14 |   <input
15 |     class="col-span-2 h-2 w-full cursor-pointer appearance-none rounded-lg bg-gray-300 dark:bg-gray-500"
16 |     bind:value
17 |     type="range"
18 |     id={params.id}
19 |     name={params.id}
20 |     min={params?.min}
21 |     max={params?.max}
22 |     step={params?.step ?? 1}
23 |   />
24 |   <input
25 |     type="number"
26 |     step={params?.step ?? 1}
27 |     bind:value
28 |     class="rounded-md border px-1 py-1 text-center text-xs font-bold dark:text-black"
29 |   />
30 | </div>
31 | <!-- 
32 | <style lang="postcss" scoped>
33 |   input[type='range']::-webkit-slider-runnable-track {
34 |     @apply h-2 cursor-pointer rounded-lg dark:bg-gray-50;
35 |   }
36 |   input[type='range']::-webkit-slider-thumb {
37 |     @apply cursor-pointer rounded-lg dark:bg-gray-50;
38 |   }
39 |   input[type='range']::-moz-range-track {
40 |     @apply cursor-pointer rounded-lg dark:bg-gray-50;
41 |   }
42 |   input[type='range']::-moz-range-thumb {
43 |     @apply cursor-pointer rounded-lg dark:bg-gray-50;
44 |   }
45 |   input[type='range']::-ms-track {
46 |     @apply cursor-pointer rounded-lg dark:bg-gray-50;
47 |   }
48 |   input[type='range']::-ms-thumb {
49 |     @apply cursor-pointer rounded-lg dark:bg-gray-50;
50 |   }
51 | </style> -->
52 | 


--------------------------------------------------------------------------------
/frontend/src/lib/components/MediaListSwitcher.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   import { mediaDevices, mediaStreamActions } from "$lib/mediaStream";
 3 |   import Screen from "$lib/icons/screen.svelte";
 4 |   import AspectRatioSelect from "./AspectRatioSelect.svelte";
 5 |   import { onMount } from "svelte";
 6 | 
 7 |   let deviceId: string = "";
 8 |   let aspectRatio: number = 1;
 9 | 
10 |   onMount(() => {
11 |     deviceId = $mediaDevices[0].deviceId;
12 |   });
13 | </script>
14 | 
15 | <div
16 |   class="flex items-center justify-center text-xs backdrop-blur-sm backdrop-grayscale"
17 | >
18 |   <AspectRatioSelect
19 |     bind:aspectRatio
20 |     change={(value) => mediaStreamActions.switchCamera(deviceId, value)}
21 |   />
22 |   <button
23 |     title="Share your screen"
24 |     class="my-1 flex cursor-pointer gap-1 rounded-md border border-gray-500/50 bg-slate-100/30 p-1 font-medium text-white"
25 |     onclick={() => mediaStreamActions.startScreenCapture()}
26 |   >
27 |     <span>Share</span>
28 | 
29 |     <Screen />
30 |   </button>
31 |   {#if $mediaDevices}
32 |     <select
33 |       bind:value={deviceId}
34 |       onchange={() => mediaStreamActions.switchCamera(deviceId, aspectRatio)}
35 |       id="devices-list"
36 |       class="block cursor-pointer rounded-md border border-gray-800/50 bg-slate-100/30 p-1 font-medium text-white"
37 |     >
38 |       {#each $mediaDevices as device (device.deviceId)}
39 |         <option value={device.deviceId}>{device.label}</option>
40 |       {/each}
41 |     </select>
42 |   {/if}
43 | </div>
44 | 


--------------------------------------------------------------------------------
/frontend/src/lib/components/PipelineOptions.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   import type { Fields } from "$lib/types";
 3 |   import { FieldType } from "$lib/types";
 4 |   import InputRange from "./InputRange.svelte";
 5 |   import SeedInput from "./SeedInput.svelte";
 6 |   import TextArea from "./TextArea.svelte";
 7 |   import Checkbox from "./Checkbox.svelte";
 8 |   import Selectlist from "./Selectlist.svelte";
 9 |   import { pipelineValues } from "$lib/store";
10 | 
11 |   let { pipelineParams = $bindable() }: { pipelineParams: Fields } = $props();
12 | 
13 |   let advanceOptions = $derived(
14 |     Object.values(pipelineParams)?.filter((e) => e?.hide == true),
15 |   );
16 |   let featuredOptions = $derived(
17 |     Object.values(pipelineParams)?.filter((e) => e?.hide !== true),
18 |   );
19 | </script>
20 | 
21 | <div class="flex flex-col gap-3">
22 |   <div class="grid grid-cols-1 items-center gap-3">
23 |     {#if featuredOptions}
24 |       {#each featuredOptions as params (params.id)}
25 |         {#if params.field === FieldType.RANGE}
26 |           <InputRange {params} bind:value={$pipelineValues[params.id] as number}
27 |           ></InputRange>
28 |         {:else if params.field === FieldType.SEED}
29 |           <SeedInput {params} bind:value={$pipelineValues[params.id] as number}
30 |           ></SeedInput>
31 |         {:else if params.field === FieldType.TEXTAREA}
32 |           <TextArea {params} bind:value={$pipelineValues[params.id] as string}
33 |           ></TextArea>
34 |         {:else if params.field === FieldType.CHECKBOX}
35 |           <Checkbox {params} bind:value={$pipelineValues[params.id] as boolean}
36 |           ></Checkbox>
37 |         {:else if params.field === FieldType.SELECT}
38 |           <Selectlist {params} bind:value={$pipelineValues[params.id] as string}
39 |           ></Selectlist>
40 |         {/if}
41 |       {/each}
42 |     {/if}
43 |   </div>
44 | 
45 |   <details>
46 |     <summary class="cursor-pointer font-medium">Advanced Options</summary>
47 |     <div
48 |       class="grid grid-cols-1 items-center gap-3 {Object.values(pipelineParams)
49 |         .length > 5
50 |         ? 'sm:grid-cols-2'
51 |         : ''}"
52 |     >
53 |       {#if advanceOptions}
54 |         {#each advanceOptions as params (params.id)}
55 |           {#if params.field === FieldType.RANGE}
56 |             <InputRange
57 |               {params}
58 |               bind:value={$pipelineValues[params.id] as number}
59 |             ></InputRange>
60 |           {:else if params.field === FieldType.SEED}
61 |             <SeedInput
62 |               {params}
63 |               bind:value={$pipelineValues[params.id] as number}
64 |             ></SeedInput>
65 |           {:else if params.field === FieldType.TEXTAREA}
66 |             <TextArea {params} bind:value={$pipelineValues[params.id] as string}
67 |             ></TextArea>
68 |           {:else if params.field === FieldType.CHECKBOX}
69 |             <Checkbox
70 |               {params}
71 |               bind:value={$pipelineValues[params.id] as boolean}
72 |             ></Checkbox>
73 |           {:else if params.field === FieldType.SELECT}
74 |             <Selectlist
75 |               {params}
76 |               bind:value={$pipelineValues[params.id] as string}
77 |             ></Selectlist>
78 |           {/if}
79 |         {/each}
80 |       {/if}
81 |     </div>
82 |   </details>
83 | </div>
84 | 


--------------------------------------------------------------------------------
/frontend/src/lib/components/SeedInput.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   import Button from "./Button.svelte";
 3 |   import { onMount } from "svelte";
 4 |   import type { FieldProps } from "$lib/types";
 5 |   let { value = $bindable(), params }: { value: number; params: FieldProps } =
 6 |     $props();
 7 | 
 8 |   function randomize() {
 9 |     value = Math.floor(Math.random() * Number.MAX_SAFE_INTEGER);
10 |   }
11 |   onMount(() => {
12 |     value = Number(params?.default ?? "");
13 |   });
14 | </script>
15 | 
16 | <div class="grid max-w-md grid-cols-4 items-center gap-3">
17 |   <label class="text-sm font-medium" for="seed">Seed</label>
18 |   <input
19 |     bind:value
20 |     type="number"
21 |     id="seed"
22 |     name="seed"
23 |     class="col-span-2 rounded-md border border-gray-700 p-2 text-right font-light dark:text-black"
24 |   />
25 |   <Button onclick={randomize}>Rand</Button>
26 | </div>
27 | 


--------------------------------------------------------------------------------
/frontend/src/lib/components/Selectlist.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   import { onMount } from "svelte";
 3 |   import type { FieldProps } from "$lib/types";
 4 |   let { value = $bindable(""), params }: { value: string; params: FieldProps } =
 5 |     $props();
 6 | 
 7 |   onMount(() => {
 8 |     value = String(params?.default);
 9 |   });
10 | </script>
11 | 
12 | <div class="grid max-w-md grid-cols-4 items-center justify-items-start gap-3">
13 |   <label for="model-list" class="text-sm font-medium">{params?.title} </label>
14 |   {#if params?.values}
15 |     <select
16 |       bind:value
17 |       id="model-list"
18 |       class="cursor-pointer rounded-md border-2 border-gray-500 p-2 font-light dark:text-black"
19 |     >
20 |       {#each params.values as model, i (model)}
21 |         <option value={model} selected={i === 0}>{model}</option>
22 |       {/each}
23 |     </select>
24 |   {/if}
25 | </div>
26 | 


--------------------------------------------------------------------------------
/frontend/src/lib/components/TextArea.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   import { onMount } from "svelte";
 3 |   import type { FieldProps } from "$lib/types";
 4 |   let { value = $bindable(), params }: { value: string; params: FieldProps } =
 5 |     $props();
 6 | 
 7 |   onMount(() => {
 8 |     value = String(params?.default ?? "");
 9 |   });
10 | </script>
11 | 
12 | <div class="">
13 |   <label class="text-sm font-medium" for={params?.title}>
14 |     {params?.title}
15 |   </label>
16 |   <div class="text-normal flex items-center rounded-md border border-gray-700">
17 |     <textarea
18 |       class="mx-1 w-full px-3 py-2 font-light outline-none dark:text-black"
19 |       title={params?.title}
20 |       placeholder="Add your prompt here..."
21 |       bind:value
22 |     ></textarea>
23 |   </div>
24 | </div>
25 | 


--------------------------------------------------------------------------------
/frontend/src/lib/components/VideoInput.svelte:
--------------------------------------------------------------------------------
  1 | <script lang="ts">
  2 |   import "rvfc-polyfill";
  3 | 
  4 |   import { onDestroy, onMount } from "svelte";
  5 |   import {
  6 |     mediaStreamStatus,
  7 |     MediaStreamStatusEnum,
  8 |     onFrameChangeStore,
  9 |     mediaStream,
 10 |     mediaDevices,
 11 |   } from "$lib/mediaStream";
 12 |   import MediaListSwitcher from "./MediaListSwitcher.svelte";
 13 |   import Button from "$lib/components/Button.svelte";
 14 |   import Expand from "$lib/icons/expand.svelte";
 15 | 
 16 |   let { width = 512, height = 512 }: { width: number; height: number } =
 17 |     $props();
 18 | 
 19 |   const size = { width, height };
 20 | 
 21 |   let videoEl: HTMLVideoElement | undefined = $state();
 22 |   let canvasEl: HTMLCanvasElement | undefined = $state();
 23 |   let ctx: CanvasRenderingContext2D | undefined = $state();
 24 |   let videoFrameCallbackId: number | undefined = $state();
 25 | 
 26 |   // ajust the throttle time to your needs
 27 |   const THROTTLE = 1000 / 120;
 28 |   let selectedDevice: string = $state("");
 29 |   let videoIsReady = $state(false);
 30 | 
 31 |   onMount(() => {
 32 |     if (canvasEl) {
 33 |       ctx = canvasEl.getContext("2d") as CanvasRenderingContext2D;
 34 |       canvasEl.width = size.width;
 35 |       canvasEl.height = size.height;
 36 |     }
 37 |   });
 38 |   $effect(() => {
 39 |     console.log(selectedDevice);
 40 |   });
 41 | 
 42 |   onDestroy(() => {
 43 |     if (videoFrameCallbackId && videoEl) {
 44 |       videoEl.cancelVideoFrameCallback(videoFrameCallbackId);
 45 |     }
 46 |   });
 47 | 
 48 |   $effect(() => {
 49 |     if (videoEl && $mediaStream) {
 50 |       videoEl.srcObject = $mediaStream;
 51 |     }
 52 |   });
 53 | 
 54 |   let lastMillis = 0;
 55 |   async function onFrameChange(now: DOMHighResTimeStamp) {
 56 |     if (!videoEl || !ctx || !canvasEl) return;
 57 | 
 58 |     if (now - lastMillis < THROTTLE) {
 59 |       videoFrameCallbackId = videoEl.requestVideoFrameCallback(onFrameChange);
 60 |       return;
 61 |     }
 62 | 
 63 |     lastMillis = now;
 64 |     const videoWidth = videoEl.videoWidth;
 65 |     const videoHeight = videoEl.videoHeight;
 66 |     // scale down video to fit canvas,  size.width, size.height
 67 |     const scale = Math.min(size.width / videoWidth, size.height / videoHeight);
 68 |     const width0 = videoWidth * scale;
 69 |     const height0 = videoHeight * scale;
 70 |     const x0 = (size.width - width0) / 2;
 71 |     const y0 = (size.height - height0) / 2;
 72 |     ctx.clearRect(0, 0, size.width, size.height);
 73 |     ctx.drawImage(videoEl, x0, y0, width0, height0);
 74 | 
 75 |     const blob = await new Promise<Blob>((resolve) => {
 76 |       canvasEl?.toBlob(
 77 |         (blob) => {
 78 |           resolve(blob as Blob);
 79 |         },
 80 |         "image/jpeg",
 81 |         1,
 82 |       );
 83 |     });
 84 |     onFrameChangeStore.set({ blob });
 85 |     videoFrameCallbackId = videoEl.requestVideoFrameCallback(onFrameChange);
 86 |   }
 87 | 
 88 |   $effect(() => {
 89 |     if (
 90 |       $mediaStreamStatus == MediaStreamStatusEnum.CONNECTED &&
 91 |       videoIsReady &&
 92 |       videoEl
 93 |     ) {
 94 |       videoFrameCallbackId = videoEl.requestVideoFrameCallback(onFrameChange);
 95 |     }
 96 |   });
 97 | 
 98 |   function toggleFullscreen() {
 99 |     if (videoIsReady && videoEl) {
100 |       if (document.fullscreenElement) {
101 |         document.exitFullscreen();
102 |       } else {
103 |         videoEl.requestFullscreen();
104 |       }
105 |     }
106 |   }
107 | </script>
108 | 
109 | <div
110 |   class="relative mx-auto max-w-lg overflow-hidden rounded-lg border border-slate-300"
111 | >
112 |   <div
113 |     class="relative z-10 flex aspect-square w-full items-center justify-center object-cover"
114 |   >
115 |     {#if $mediaDevices.length > 0}
116 |       <div class="absolute bottom-0 right-0 z-10 flex bg-slate-400/40">
117 |         <MediaListSwitcher />
118 |         <Button
119 |           onclick={toggleFullscreen}
120 |           title="Expand Fullscreen"
121 |           class="ml-auto rounded-lg p-1 text-sm text-white opacity-50 shadow-lg"
122 |         >
123 |           <Expand />
124 |         </Button>
125 |       </div>
126 |     {/if}
127 |     <video
128 |       class="pointer-events-none aspect-square w-full justify-center object-contain"
129 |       bind:this={videoEl}
130 |       onloadeddata={() => {
131 |         videoIsReady = true;
132 |       }}
133 |       playsinline
134 |       autoplay
135 |       muted
136 |       loop
137 |     ></video>
138 |     <canvas
139 |       bind:this={canvasEl}
140 |       class="absolute left-0 top-0 aspect-square w-full object-cover"
141 |     ></canvas>
142 |   </div>
143 |   <div
144 |     class="absolute left-0 top-0 flex aspect-square w-full items-center justify-center"
145 |   >
146 |     <svg
147 |       xmlns="http://www.w3.org/2000/svg"
148 |       viewBox="0 0 448 448"
149 |       class="w-40 p-5 opacity-20"
150 |     >
151 |       <path
152 |         fill="currentColor"
153 |         d="M224 256a128 128 0 1 0 0-256 128 128 0 1 0 0 256zm-45.7 48A178.3 178.3 0 0 0 0 482.3 29.7 29.7 0 0 0 29.7 512h388.6a29.7 29.7 0 0 0 29.7-29.7c0-98.5-79.8-178.3-178.3-178.3h-91.4z"
154 |       />
155 |     </svg>
156 |   </div>
157 | </div>
158 | 


--------------------------------------------------------------------------------
/frontend/src/lib/components/Warning.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   let { message = $bindable() }: { message: string } = $props();
 3 | 
 4 |   let timeout = $state(0);
 5 |   $effect(() => {
 6 |     if (message !== "") {
 7 |       console.log("message", message);
 8 |       clearTimeout(timeout);
 9 |       timeout = setTimeout(() => {
10 |         message = "";
11 |       }, 5000);
12 |     }
13 |   });
14 | </script>
15 | 
16 | {#if message}
17 |   <div role="alert" class="fixed right-0 top-0 m-4">
18 |     <button
19 |       type="button"
20 |       class="w-full"
21 |       onclick={() => (message = "")}
22 |       onkeydown={(e) => e.key === "Enter" && (message = "")}
23 |     >
24 |       <div class="rounded bg-red-800 p-4 text-white">
25 |         {message}
26 |       </div>
27 |       <div class="bar transition-all duration-500" style="width: 0;"></div>
28 |     </button>
29 |   </div>
30 | {/if}
31 | 
32 | <style lang="postcss" scoped>
33 | </style>
34 | 


--------------------------------------------------------------------------------
/frontend/src/lib/icons/aspect.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   let props = $props();
 3 | </script>
 4 | 
 5 | <svg
 6 |   xmlns="http://www.w3.org/2000/svg"
 7 |   viewBox="0 0 448 512"
 8 |   height="16px"
 9 |   {...props}
10 | >
11 |   <path
12 |     fill="currentColor"
13 |     d="M32 32C14.3 32 0 46.3 0 64v96c0 17.7 14.3 32 32 32s32-14.3 32-32V96h64c17.7 0 32-14.3 32-32s-14.3-32-32-32H32zM64 352c0-17.7-14.3-32-32-32s-32 14.3-32 32v96c0 17.7 14.3 32 32 32h96c17.7 0 32-14.3 32-32s-14.3-32-32-32H64V352zM320 32c-17.7 0-32 14.3-32 32s14.3 32 32 32h64v64c0 17.7 14.3 32 32 32s32-14.3 32-32V64c0-17.7-14.3-32-32-32H320zM448 352c0-17.7-14.3-32-32-32s-32 14.3-32 32v64H320c-17.7 0-32 14.3-32 32s14.3 32 32 32h96c17.7 0 32-14.3 32-32V352z"
14 |   />
15 | </svg>
16 | 


--------------------------------------------------------------------------------
/frontend/src/lib/icons/expand.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   let props = $props();
 3 | </script>
 4 | 
 5 | <svg
 6 |   xmlns="http://www.w3.org/2000/svg"
 7 |   viewBox="0 0 512 512"
 8 |   height="1em"
 9 |   {...props}
10 | >
11 |   <path
12 |     fill="currentColor"
13 |     d="M.3 89.5C.1 91.6 0 93.8 0 96V224 416c0 35.3 28.7 64 64 64l384 0c35.3 0 64-28.7 64-64V224 96c0-35.3-28.7-64-64-64H64c-2.2 0-4.4 .1-6.5 .3c-9.2 .9-17.8 3.8-25.5 8.2C21.8 46.5 13.4 55.1 7.7 65.5c-3.9 7.3-6.5 15.4-7.4 24zM48 224H464l0 192c0 8.8-7.2 16-16 16L64 432c-8.8 0-16-7.2-16-16l0-192z"
14 |   />
15 | </svg>
16 | 


--------------------------------------------------------------------------------
/frontend/src/lib/icons/floppy.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   let props = $props();
 3 | </script>
 4 | 
 5 | <svg
 6 |   xmlns="http://www.w3.org/2000/svg"
 7 |   height="1em"
 8 |   viewBox="0 0 448 512"
 9 |   {...props}
10 | >
11 |   <path
12 |     fill="currentColor"
13 |     d="M48 96v320a16 16 0 0 0 16 16h320a16 16 0 0 0 16-16V170.5a16 16 0 0 0-4.7-11.3l33.9-33.9a64 64 0 0 1 18.7 45.3V416a64 64 0 0 1-64 64H64a64 64 0 0 1-64-64V96a64 64 0 0 1 64-64h245.5a64 64 0 0 1 45.3 18.7l74.5 74.5-33.9 33.9-74.6-74.4-.8-.8V184a24 24 0 0 1-24 24H104a24 24 0 0 1-24-24V80H64a16 16 0 0 0-16 16zm80-16v80h144V80H128zm32 240a64 64 0 1 1 128 0 64 64 0 1 1-128 0z"
14 |   />
15 | </svg>
16 | 


--------------------------------------------------------------------------------
/frontend/src/lib/icons/screen.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   let props = $props();
 3 | </script>
 4 | 
 5 | <svg
 6 |   xmlns="http://www.w3.org/2000/svg"
 7 |   viewBox="0 -32 576 576"
 8 |   height="16px"
 9 |   {...props}
10 | >
11 |   <path
12 |     fill="currentColor"
13 |     d="M64 0A64 64 0 0 0 0 64v288a64 64 0 0 0 64 64h176l-10.7 32H160a32 32 0 1 0 0 64h256a32 32 0 1 0 0-64h-69.3L336 416h176a64 64 0 0 0 64-64V64a64 64 0 0 0-64-64H64zm448 64v288H64V64h448z"
14 |   />
15 | </svg>
16 | 


--------------------------------------------------------------------------------
/frontend/src/lib/icons/spinner.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   let props = $props();
 3 | </script>
 4 | 
 5 | <svg
 6 |   xmlns="http://www.w3.org/2000/svg"
 7 |   height="1em"
 8 |   viewBox="0 0 512 512"
 9 |   {...props}
10 | >
11 |   <path
12 |     fill="currentColor"
13 |     d="M304 48a48 48 0 1 0 -96 0 48 48 0 1 0 96 0zm0 416a48 48 0 1 0 -96 0 48 48 0 1 0 96 0zM48 304a48 48 0 1 0 0-96 48 48 0 1 0 0 96zm464-48a48 48 0 1 0 -96 0 48 48 0 1 0 96 0zM142.9 437A48 48 0 1 0 75 369.1 48 48 0 1 0 142.9 437zm0-294.2A48 48 0 1 0 75 75a48 48 0 1 0 67.9 67.9zM369.1 437A48 48 0 1 0 437 369.1 48 48 0 1 0 369.1 437z"
14 |   />
15 | </svg>
16 | 


--------------------------------------------------------------------------------
/frontend/src/lib/index.ts:
--------------------------------------------------------------------------------
1 | // place files you want to import through the `$lib` alias in this folder.
2 | 


--------------------------------------------------------------------------------
/frontend/src/lib/lcmLive.ts:
--------------------------------------------------------------------------------
  1 | import { get, writable } from "svelte/store";
  2 | 
  3 | export enum LCMLiveStatus {
  4 |   CONNECTED = "connected",
  5 |   DISCONNECTED = "disconnected",
  6 |   CONNECTING = "connecting",
  7 |   WAIT = "wait",
  8 |   SEND_FRAME = "send_frame",
  9 |   TIMEOUT = "timeout",
 10 |   ERROR = "error",
 11 | }
 12 | 
 13 | const initStatus: LCMLiveStatus = LCMLiveStatus.DISCONNECTED;
 14 | 
 15 | export const lcmLiveStatus = writable<LCMLiveStatus>(initStatus);
 16 | export const streamId = writable<string | null>(null);
 17 | 
 18 | // WebSocket connection
 19 | let websocket: WebSocket | null;
 20 | 
 21 | // Register browser unload event listener to properly close WebSockets
 22 | if (typeof window !== "undefined") {
 23 |   window.addEventListener("beforeunload", () => {
 24 |     // Close the WebSocket properly if it exists
 25 |     if (websocket && websocket.readyState === WebSocket.OPEN) {
 26 |       websocket.close(1000, "Page unload");
 27 |     }
 28 |   });
 29 | }
 30 | export const lcmLiveActions = {
 31 |   async start(
 32 |     getSreamdata: () =>
 33 |       | [Record<string, unknown>]
 34 |       | [Record<string, unknown>, Blob],
 35 |   ) {
 36 |     return new Promise((resolve, reject) => {
 37 |       try {
 38 |         // Set connecting status immediately
 39 |         lcmLiveStatus.set(LCMLiveStatus.CONNECTING);
 40 | 
 41 |         const userId = crypto.randomUUID();
 42 |         const websocketURL = `${
 43 |           window.location.protocol === "https:" ? "wss" : "ws"
 44 |         }:${window.location.host}/api/ws/${userId}`;
 45 | 
 46 |         // Close any existing connection first
 47 |         if (websocket && websocket.readyState !== WebSocket.CLOSED) {
 48 |           websocket.close();
 49 |         }
 50 | 
 51 |         websocket = new WebSocket(websocketURL);
 52 | 
 53 |         // Set a connection timeout
 54 |         const connectionTimeout = setTimeout(() => {
 55 |           if (websocket && websocket.readyState !== WebSocket.OPEN) {
 56 |             console.error("WebSocket connection timeout");
 57 |             lcmLiveStatus.set(LCMLiveStatus.ERROR);
 58 |             streamId.set(null);
 59 |             reject(new Error("Connection timeout. Please try again."));
 60 |             websocket.close();
 61 |           }
 62 |         }, 10000); // 10 second timeout
 63 | 
 64 |         websocket.onopen = () => {
 65 |           clearTimeout(connectionTimeout);
 66 |           console.log("Connected to websocket");
 67 |         };
 68 | 
 69 |         websocket.onclose = (event) => {
 70 |           clearTimeout(connectionTimeout);
 71 |           console.log(
 72 |             `Disconnected from websocket: ${event.code} ${event.reason}`,
 73 |           );
 74 | 
 75 |           // Only change status if we're not in ERROR state (which would mean we already handled the error)
 76 |           if (get(lcmLiveStatus) !== LCMLiveStatus.ERROR) {
 77 |             lcmLiveStatus.set(LCMLiveStatus.DISCONNECTED);
 78 |           }
 79 | 
 80 |           // If connection was never established (close without open)
 81 |           if (event.code === 1006 && get(streamId) === null) {
 82 |             reject(
 83 |               new Error("Cannot connect to server. Please try again later."),
 84 |             );
 85 |           }
 86 |         };
 87 | 
 88 |         websocket.onerror = (err) => {
 89 |           clearTimeout(connectionTimeout);
 90 |           console.error("WebSocket error:", err);
 91 |           lcmLiveStatus.set(LCMLiveStatus.ERROR);
 92 |           streamId.set(null);
 93 |           reject(new Error("Connection error. Please try again."));
 94 |         };
 95 | 
 96 |         websocket.onmessage = (event) => {
 97 |           try {
 98 |             const data = JSON.parse(event.data);
 99 |             switch (data.status) {
100 |               case "connected":
101 |                 lcmLiveStatus.set(LCMLiveStatus.CONNECTED);
102 |                 streamId.set(userId);
103 |                 resolve({ status: "connected", userId });
104 |                 break;
105 |               case "send_frame":
106 |                 lcmLiveStatus.set(LCMLiveStatus.SEND_FRAME);
107 |                 try {
108 |                   const streamData = getSreamdata();
109 |                   // Send as an object, not a string, to use the proper handling in the send method
110 |                   this.send({ status: "next_frame" });
111 |                   for (const d of streamData) {
112 |                     this.send(d);
113 |                   }
114 |                 } catch (error) {
115 |                   console.error("Error sending frame data:", error);
116 |                 }
117 |                 break;
118 |               case "wait":
119 |                 lcmLiveStatus.set(LCMLiveStatus.WAIT);
120 |                 break;
121 |               case "timeout":
122 |                 console.log("Session timeout");
123 |                 lcmLiveStatus.set(LCMLiveStatus.TIMEOUT);
124 |                 streamId.set(null);
125 |                 reject(new Error("Session timeout. Please restart."));
126 |                 break;
127 |               case "error":
128 |                 console.error("Server error:", data.message);
129 |                 lcmLiveStatus.set(LCMLiveStatus.ERROR);
130 |                 streamId.set(null);
131 |                 reject(new Error(data.message || "Server error occurred"));
132 |                 break;
133 |               default:
134 |                 console.log("Unknown message status:", data.status);
135 |             }
136 |           } catch (error) {
137 |             console.error("Error handling websocket message:", error);
138 |           }
139 |         };
140 |       } catch (err) {
141 |         console.error("Error initializing websocket:", err);
142 |         lcmLiveStatus.set(LCMLiveStatus.ERROR);
143 |         streamId.set(null);
144 |         reject(err);
145 |       }
146 |     });
147 |   },
148 |   send(data: Blob | Record<string, unknown>) {
149 |     try {
150 |       if (websocket && websocket.readyState === WebSocket.OPEN) {
151 |         if (data instanceof Blob) {
152 |           websocket.send(data);
153 |         } else {
154 |           websocket.send(JSON.stringify(data));
155 |         }
156 |       } else {
157 |         const readyStateText = websocket
158 |           ? ["CONNECTING", "OPEN", "CLOSING", "CLOSED"][websocket.readyState]
159 |           : "null";
160 |         console.warn(`WebSocket not ready for sending: ${readyStateText}`);
161 | 
162 |         // If WebSocket is closed unexpectedly, set status to disconnected
163 |         if (!websocket || websocket.readyState === WebSocket.CLOSED) {
164 |           lcmLiveStatus.set(LCMLiveStatus.DISCONNECTED);
165 |           streamId.set(null);
166 |         }
167 |       }
168 |     } catch (error) {
169 |       console.error("Error sending data through WebSocket:", error);
170 |       // Handle WebSocket error by forcing disconnection
171 |       this.stop();
172 |     }
173 |   },
174 | 
175 |   async reconnect(
176 |     getSreamdata: () =>
177 |       | [Record<string, unknown>]
178 |       | [Record<string, unknown>, Blob],
179 |   ) {
180 |     try {
181 |       await this.stop();
182 |       // Small delay to ensure clean disconnection before reconnecting
183 |       await new Promise((resolve) => setTimeout(resolve, 500));
184 |       return await this.start(getSreamdata);
185 |     } catch (error) {
186 |       console.error("Reconnection failed:", error);
187 |       throw error;
188 |     }
189 |   },
190 | 
191 |   async stop() {
192 |     lcmLiveStatus.set(LCMLiveStatus.DISCONNECTED);
193 |     try {
194 |       if (websocket) {
195 |         // Only attempt to close if not already closed
196 |         if (websocket.readyState !== WebSocket.CLOSED) {
197 |           // Set up onclose handler to clean up only
198 |           websocket.onclose = () => {
199 |             console.log("WebSocket closed cleanly during stop()");
200 |           };
201 | 
202 |           // Set up onerror to be silent during intentional closure
203 |           websocket.onerror = () => {};
204 | 
205 |           websocket.close(1000, "Client initiated disconnect");
206 |         }
207 |       }
208 |     } catch (error) {
209 |       console.error("Error during WebSocket closure:", error);
210 |     } finally {
211 |       // Always clean up references
212 |       websocket = null;
213 |       streamId.set(null);
214 |     }
215 |   },
216 | };
217 | 


--------------------------------------------------------------------------------
/frontend/src/lib/mediaStream.ts:
--------------------------------------------------------------------------------
  1 | import { get, writable, type Writable } from "svelte/store";
  2 | 
  3 | const BASE_HEIGHT = 720;
  4 | export enum MediaStreamStatusEnum {
  5 |   INIT = "init",
  6 |   CONNECTED = "connected",
  7 |   DISCONNECTED = "disconnected",
  8 | }
  9 | export const onFrameChangeStore: Writable<{ blob: Blob }> = writable({
 10 |   blob: new Blob(),
 11 | });
 12 | 
 13 | export const mediaDevices = writable<MediaDeviceInfo[]>([]);
 14 | export const mediaStreamStatus = writable(MediaStreamStatusEnum.INIT);
 15 | export const mediaStream = writable<MediaStream | null>(null);
 16 | 
 17 | export const mediaStreamActions = {
 18 |   async enumerateDevices() {
 19 |     // console.log("Enumerating devices");
 20 |     await navigator.mediaDevices
 21 |       .enumerateDevices()
 22 |       .then((devices) => {
 23 |         const cameras = devices.filter(
 24 |           (device) => device.kind === "videoinput",
 25 |         );
 26 |         mediaDevices.set(cameras);
 27 |       })
 28 |       .catch((err) => {
 29 |         console.error(err);
 30 |       });
 31 |   },
 32 |   async start(mediaDevicedID?: string, aspectRatio: number = 1) {
 33 |     const constraints = {
 34 |       audio: false,
 35 |       video: {
 36 |         width: {
 37 |           ideal: BASE_HEIGHT * aspectRatio,
 38 |         },
 39 |         height: {
 40 |           ideal: BASE_HEIGHT,
 41 |         },
 42 |         deviceId: mediaDevicedID,
 43 |       },
 44 |     };
 45 | 
 46 |     await navigator.mediaDevices
 47 |       .getUserMedia(constraints)
 48 |       .then((stream) => {
 49 |         mediaStreamStatus.set(MediaStreamStatusEnum.CONNECTED);
 50 |         mediaStream.set(stream);
 51 |       })
 52 |       .catch((err) => {
 53 |         console.error(`${err.name}: ${err.message}`);
 54 |         mediaStreamStatus.set(MediaStreamStatusEnum.DISCONNECTED);
 55 |         mediaStream.set(null);
 56 |       });
 57 |   },
 58 |   async startScreenCapture() {
 59 |     const displayMediaOptions = {
 60 |       video: {
 61 |         displaySurface: "window",
 62 |       },
 63 |       audio: false,
 64 |       surfaceSwitching: "include",
 65 |     };
 66 | 
 67 |     let captureStream = null;
 68 | 
 69 |     try {
 70 |       captureStream =
 71 |         await navigator.mediaDevices.getDisplayMedia(displayMediaOptions);
 72 |       const videoTrack = captureStream.getVideoTracks()[0];
 73 | 
 74 |       console.log("Track settings:");
 75 |       console.log(JSON.stringify(videoTrack.getSettings(), null, 2));
 76 |       console.log("Track constraints:");
 77 |       console.log(JSON.stringify(videoTrack.getConstraints(), null, 2));
 78 |       mediaStreamStatus.set(MediaStreamStatusEnum.CONNECTED);
 79 |       mediaStream.set(captureStream);
 80 | 
 81 |       const capabilities = videoTrack.getCapabilities();
 82 |       const aspectRatio = capabilities.aspectRatio;
 83 |       console.log("Aspect Ratio Constraints:", aspectRatio);
 84 |     } catch (err) {
 85 |       console.error(err);
 86 |     }
 87 |   },
 88 |   async switchCamera(mediaDevicedID: string, aspectRatio: number) {
 89 |     console.log("Switching camera");
 90 |     if (get(mediaStreamStatus) !== MediaStreamStatusEnum.CONNECTED) {
 91 |       return;
 92 |     }
 93 |     const constraints = {
 94 |       audio: false,
 95 |       video: {
 96 |         width: {
 97 |           ideal: BASE_HEIGHT * aspectRatio,
 98 |         },
 99 |         height: {
100 |           ideal: BASE_HEIGHT,
101 |         },
102 |         deviceId: mediaDevicedID,
103 |       },
104 |     };
105 |     console.log("Switching camera", constraints);
106 |     await navigator.mediaDevices
107 |       .getUserMedia(constraints)
108 |       .then((stream) => {
109 |         mediaStreamStatus.set(MediaStreamStatusEnum.CONNECTED);
110 |         mediaStream.set(stream);
111 |       })
112 |       .catch((err) => {
113 |         console.error(`${err.name}: ${err.message}`);
114 |       });
115 |   },
116 |   async stop() {
117 |     navigator.mediaDevices.getUserMedia({ video: true }).then((stream) => {
118 |       stream.getTracks().forEach((track) => track.stop());
119 |     });
120 |     mediaStreamStatus.set(MediaStreamStatusEnum.DISCONNECTED);
121 |     mediaStream.set(null);
122 |   },
123 | };
124 | 


--------------------------------------------------------------------------------
/frontend/src/lib/store.ts:
--------------------------------------------------------------------------------
 1 | import {
 2 |   derived,
 3 |   get,
 4 |   writable,
 5 |   type Readable,
 6 |   type Writable,
 7 | } from "svelte/store";
 8 | 
 9 | export type PipelineValues = Record<string, string | boolean | number>;
10 | 
11 | export const pipelineValues: Writable<PipelineValues> = writable({});
12 | export const deboucedPipelineValues: Readable<PipelineValues> = derived(
13 |   pipelineValues,
14 |   ($pipelineValues, set) => {
15 |     const debounced = setTimeout(() => {
16 |       set($pipelineValues);
17 |     }, 100);
18 |     return () => clearTimeout(debounced);
19 |   },
20 | );
21 | 
22 | export const getPipelineValues = () => get(pipelineValues);
23 | 


--------------------------------------------------------------------------------
/frontend/src/lib/types.ts:
--------------------------------------------------------------------------------
 1 | export const enum FieldType {
 2 |   RANGE = "range",
 3 |   SEED = "seed",
 4 |   TEXTAREA = "textarea",
 5 |   CHECKBOX = "checkbox",
 6 |   SELECT = "select",
 7 | }
 8 | export const enum PipelineMode {
 9 |   IMAGE = "image",
10 |   VIDEO = "video",
11 |   TEXT = "text",
12 | }
13 | 
14 | export interface Fields {
15 |   [key: string]: FieldProps;
16 | }
17 | 
18 | export interface FieldProps {
19 |   default: number | string;
20 |   max?: number;
21 |   min?: number;
22 |   title: string;
23 |   field: FieldType;
24 |   step?: number;
25 |   disabled?: boolean;
26 |   hide?: boolean;
27 |   id: string;
28 |   values?: string[];
29 | }
30 | export interface PipelineInfo {
31 |   title: {
32 |     default: string;
33 |   };
34 |   name: string;
35 |   description: string;
36 |   input_mode: {
37 |     default: PipelineMode;
38 |   };
39 | }
40 | 


--------------------------------------------------------------------------------
/frontend/src/lib/utils.ts:
--------------------------------------------------------------------------------
 1 | import * as piexif from "piexifjs";
 2 | 
 3 | export interface IImageInfo {
 4 |   prompt?: string;
 5 |   negative_prompt?: string;
 6 |   seed?: number;
 7 |   guidance_scale?: number;
 8 | }
 9 | 
10 | export enum windowType {
11 |   image = "image",
12 | }
13 | 
14 | export function snapImage(imageEl: HTMLImageElement, info: IImageInfo) {
15 |   try {
16 |     const zeroth: { [key: string]: string | number } = {};
17 |     const exif: { [key: string]: string | number } = {};
18 |     const gps: { [key: string]: string | number } = {};
19 |     zeroth[piexif.ImageIFD.Make] = "LCM Image-to-Image ControNet";
20 |     zeroth[piexif.ImageIFD.ImageDescription] =
21 |       `prompt: ${info?.prompt} | negative_prompt: ${info?.negative_prompt} | seed: ${info?.seed} | guidance_scale: ${info?.guidance_scale}`;
22 |     zeroth[piexif.ImageIFD.Software] =
23 |       "https://github.com/radames/Real-Time-Latent-Consistency-Model";
24 |     exif[piexif.ExifIFD.DateTimeOriginal] = new Date().toISOString();
25 | 
26 |     const exifObj = { "0th": zeroth, Exif: exif, GPS: gps };
27 |     const exifBytes = piexif.dump(exifObj);
28 | 
29 |     const canvas = document.createElement("canvas");
30 |     canvas.width = imageEl.naturalWidth;
31 |     canvas.height = imageEl.naturalHeight;
32 |     const ctx = canvas.getContext("2d") as CanvasRenderingContext2D;
33 |     ctx.drawImage(imageEl, 0, 0);
34 |     const dataURL = canvas.toDataURL("image/jpeg");
35 |     const withExif = piexif.insert(exifBytes, dataURL);
36 | 
37 |     const a = document.createElement("a");
38 |     a.href = withExif;
39 |     a.download = `lcm_txt_2_img${Date.now()}.png`;
40 |     a.click();
41 |   } catch (err) {
42 |     console.log(err);
43 |   }
44 | }
45 | 
46 | export function expandWindow(streamURL: string) {
47 |   const newWindow = window.open(
48 |     "",
49 |     "_blank",
50 |     "width=1024,height=1024,scrollbars=0,resizable=1,toolbar=0,menubar=0,location=0,directories=0,status=0",
51 |   ) as Window;
52 | 
53 |   const html = `
54 |       <html>
55 |           <head>
56 |               <title>Real-Time Latent Consistency Model</title>
57 |               <style>
58 |                   body {
59 |                       margin: 0;
60 |                       padding: 0;
61 |                       background-color: black;
62 |                   }
63 |               </style>
64 |           </head>
65 |           <body>
66 |               <script>
67 |                   let isFullscreen = false;
68 |                   window.onkeydown = function(event) {
69 |                       switch (event.code) {
70 |                           case "Escape":
71 |                               window.close();
72 |                               break;
73 |                           case "Enter":
74 |                               if (isFullscreen) {
75 |                                   document.exitFullscreen();
76 |                                   isFullscreen = false;
77 |                               } else {
78 |                                   document.documentElement.requestFullscreen();
79 |                                   isFullscreen = true;
80 |                               }
81 |                               break;
82 |                       }
83 |                   }
84 |               </script>
85 |           </body>
86 |       </html>
87 |       `;
88 |   newWindow.document.write(html);
89 | 
90 |   const img = newWindow.document.createElement("img");
91 |   img.src = streamURL;
92 |   img.style.width = "100%";
93 |   img.style.height = "100%";
94 |   img.style.objectFit = "contain";
95 |   newWindow.document.body.appendChild(img);
96 | 
97 |   return newWindow;
98 | }
99 | 


--------------------------------------------------------------------------------
/frontend/src/piexifjs.d.ts:
--------------------------------------------------------------------------------
 1 | declare module "piexifjs" {
 2 |   export const ImageIFD: {
 3 |     Make: number;
 4 |     ImageDescription: number;
 5 |     Software: number;
 6 |   };
 7 |   export const ExifIFD: {
 8 |     DateTimeOriginal: number;
 9 |   };
10 |   export function dump(exifObj: Record<string, unknown>): string;
11 |   export function insert(exifBytes: string, dataURL: string): string;
12 | }
13 | 


--------------------------------------------------------------------------------
/frontend/src/routes/+layout.svelte:
--------------------------------------------------------------------------------
1 | <script lang="ts">
2 |   import "../app.css";
3 | 
4 |   let { children } = $props();
5 | </script>
6 | 
7 | {@render children()}
8 | 


--------------------------------------------------------------------------------
/frontend/src/routes/+page.svelte:
--------------------------------------------------------------------------------
  1 | <script lang="ts">
  2 |   import { onMount } from "svelte";
  3 |   import type { Fields, PipelineInfo } from "$lib/types";
  4 |   import { PipelineMode } from "$lib/types";
  5 |   import ImagePlayer from "$lib/components/ImagePlayer.svelte";
  6 |   import VideoInput from "$lib/components/VideoInput.svelte";
  7 |   import Button from "$lib/components/Button.svelte";
  8 |   import PipelineOptions from "$lib/components/PipelineOptions.svelte";
  9 |   import Spinner from "$lib/icons/spinner.svelte";
 10 |   import Warning from "$lib/components/Warning.svelte";
 11 |   import { lcmLiveStatus, lcmLiveActions, LCMLiveStatus } from "$lib/lcmLive";
 12 |   import { mediaStreamActions, onFrameChangeStore } from "$lib/mediaStream";
 13 |   import { getPipelineValues, deboucedPipelineValues } from "$lib/store";
 14 | 
 15 |   let pipelineParams: Fields | undefined = $state();
 16 |   let pipelineInfo: PipelineInfo | undefined = $state();
 17 |   let pageContent: string | undefined = $state();
 18 |   let isImageMode: boolean = $state(false);
 19 |   let maxQueueSize: number = $state(0);
 20 |   let currentQueueSize: number = $state(0);
 21 |   let disabled: boolean = $state(false);
 22 |   let queueCheckerRunning: boolean = $state(false);
 23 |   let warningMessage: string = $state("");
 24 | 
 25 |   onMount(() => {
 26 |     getSettings();
 27 |   });
 28 | 
 29 |   async function getSettings() {
 30 |     const settings = await fetch("/api/settings").then((r) => r.json());
 31 |     pipelineParams = settings.input_params.properties;
 32 |     pipelineInfo = settings.info.properties;
 33 |     isImageMode = pipelineInfo?.input_mode?.default === PipelineMode.IMAGE;
 34 |     maxQueueSize = settings.max_queue_size;
 35 |     pageContent = settings.page_content;
 36 |     toggleQueueChecker(true);
 37 |     console.log(pipelineParams);
 38 |   }
 39 |   function toggleQueueChecker(start: boolean) {
 40 |     queueCheckerRunning = start && maxQueueSize > 0;
 41 |     if (start) {
 42 |       getQueueSize();
 43 |     }
 44 |   }
 45 |   async function getQueueSize() {
 46 |     if (!queueCheckerRunning) {
 47 |       return;
 48 |     }
 49 |     const data = await fetch("/api/queue").then((r) => r.json());
 50 |     currentQueueSize = data.queue_size;
 51 |     setTimeout(getQueueSize, 10000);
 52 |   }
 53 |   function getSreamdata():
 54 |     | [Record<string, unknown>]
 55 |     | [Record<string, unknown>, Blob] {
 56 |     if (isImageMode) {
 57 |       return [getPipelineValues(), $onFrameChangeStore?.blob];
 58 |     } else {
 59 |       return [$deboucedPipelineValues];
 60 |     }
 61 |   }
 62 | 
 63 |   const isLCMRunning = $derived(
 64 |     $lcmLiveStatus !== LCMLiveStatus.DISCONNECTED &&
 65 |       $lcmLiveStatus !== LCMLiveStatus.ERROR,
 66 |   );
 67 |   const isConnecting = $derived($lcmLiveStatus === LCMLiveStatus.CONNECTING);
 68 | 
 69 |   $effect(() => {
 70 |     // Set warning messages based on lcmLiveStatus
 71 |     if ($lcmLiveStatus === LCMLiveStatus.TIMEOUT) {
 72 |       warningMessage = "Session timed out. Please try again.";
 73 |     } else if ($lcmLiveStatus === LCMLiveStatus.ERROR) {
 74 |       warningMessage = "Connection error occurred. Please try again.";
 75 |     }
 76 |   });
 77 |   async function toggleLcmLive() {
 78 |     try {
 79 |       if (!isLCMRunning) {
 80 |         if (isConnecting) {
 81 |           return; // Don't allow multiple connection attempts
 82 |         }
 83 | 
 84 |         // Clear any previous warning messages
 85 |         warningMessage = "";
 86 |         disabled = true;
 87 | 
 88 |         try {
 89 |           if (isImageMode) {
 90 |             await mediaStreamActions.enumerateDevices();
 91 |             await mediaStreamActions.start();
 92 |           }
 93 | 
 94 |           await lcmLiveActions.start(getSreamdata);
 95 |           toggleQueueChecker(false);
 96 |         } finally {
 97 |           // Always re-enable the button even if there was an error
 98 |           disabled = false;
 99 |         }
100 |       } else {
101 |         // Handle stopping - disable button during this process too
102 |         disabled = true;
103 | 
104 |         try {
105 |           if (isImageMode) {
106 |             mediaStreamActions.stop();
107 |           }
108 |           await lcmLiveActions.stop();
109 |           toggleQueueChecker(true);
110 |         } finally {
111 |           disabled = false;
112 |         }
113 |       }
114 |     } catch (e) {
115 |       console.error("Error in toggleLcmLive:", e);
116 |       warningMessage =
117 |         e instanceof Error ? e.message : "An unknown error occurred";
118 |       disabled = false;
119 |       toggleQueueChecker(true);
120 |     }
121 |   }
122 | 
123 |   // Reconnect function for automatic reconnection
124 |   async function reconnect() {
125 |     try {
126 |       disabled = true;
127 |       warningMessage = "Reconnecting...";
128 | 
129 |       if (isImageMode) {
130 |         await mediaStreamActions.stop();
131 |         await mediaStreamActions.enumerateDevices();
132 |         await mediaStreamActions.start();
133 |       }
134 | 
135 |       await lcmLiveActions.reconnect(getSreamdata);
136 |       warningMessage = "";
137 |       toggleQueueChecker(false);
138 |     } catch (e) {
139 |       warningMessage = e instanceof Error ? e.message : "Reconnection failed";
140 |       toggleQueueChecker(true);
141 |     } finally {
142 |       disabled = false;
143 |     }
144 |   }
145 | </script>
146 | 
147 | <svelte:head>
148 |   <script
149 |     src="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.9/iframeResizer.contentWindow.min.js"
150 |   ></script>
151 | </svelte:head>
152 | 
153 | <main class="container mx-auto flex max-w-5xl flex-col gap-3 px-4 py-4">
154 |   <Warning bind:message={warningMessage}></Warning>
155 |   <article class="text-center">
156 |     {#if pageContent}
157 |       <!-- eslint-disable-next-line svelte/no-at-html-tags -->
158 |       {@html pageContent}
159 |     {/if}
160 |     {#if maxQueueSize > 0}
161 |       <p class="text-sm">
162 |         There are <span id="queue_size" class="font-bold"
163 |           >{currentQueueSize}</span
164 |         >
165 |         user(s) sharing the same GPU, affecting real-time performance. Maximum queue
166 |         size is {maxQueueSize}.
167 |         <a
168 |           href="https://huggingface.co/spaces/radames/Real-Time-Latent-Consistency-Model?duplicate=true"
169 |           target="_blank"
170 |           class="text-blue-500 underline hover:no-underline">Duplicate</a
171 |         > and run it on your own GPU.
172 |       </p>
173 |     {/if}
174 | 
175 |     {#if $lcmLiveStatus === LCMLiveStatus.ERROR}
176 |       <p class="mt-2 text-sm">
177 |         <button
178 |           class="text-blue-500 underline hover:no-underline"
179 |           onclick={reconnect}
180 |           {disabled}
181 |         >
182 |           Try reconnecting
183 |         </button>
184 |       </p>
185 |     {/if}
186 |   </article>
187 |   {#if pipelineParams}
188 |     <article class="my-3 grid grid-cols-1 gap-3 sm:grid-cols-4">
189 |       {#if isImageMode}
190 |         <div class="col-span-2 sm:col-start-1">
191 |           <VideoInput
192 |             width={Number(pipelineParams.width.default)}
193 |             height={Number(pipelineParams.height.default)}
194 |           ></VideoInput>
195 |         </div>
196 |       {/if}
197 |       <div class={isImageMode ? "col-span-2 sm:col-start-3" : "col-span-4"}>
198 |         <ImagePlayer />
199 |       </div>
200 |       <div class="sm:col-span-4 sm:row-start-2">
201 |         <Button onclick={toggleLcmLive} {disabled} class="my-1 p-2 text-lg">
202 |           {#if isConnecting}
203 |             Connecting...
204 |           {:else if isLCMRunning}
205 |             Stop
206 |           {:else}
207 |             Start
208 |           {/if}
209 |         </Button>
210 |         <PipelineOptions {pipelineParams}></PipelineOptions>
211 |       </div>
212 |     </article>
213 |   {:else}
214 |     <!-- loading -->
215 |     <div class="flex items-center justify-center gap-3 py-48 text-2xl">
216 |       <Spinner class="animate-spin opacity-50"></Spinner>
217 |       <p>Loading...</p>
218 |     </div>
219 |   {/if}
220 | </main>
221 | 
222 | <style lang="postcss">
223 |   @reference "tailwindcss";
224 |   :global(html) {
225 |     @apply text-black dark:bg-gray-900 dark:text-white;
226 |   }
227 | </style>
228 | 


--------------------------------------------------------------------------------
/frontend/src/routes/+page.ts:
--------------------------------------------------------------------------------
1 | export const prerender = true;
2 | 


--------------------------------------------------------------------------------
/frontend/static/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radames/Real-Time-Latent-Consistency-Model/269f4347d93eb8e366e12b0f8f216c8b11262e76/frontend/static/favicon.png


--------------------------------------------------------------------------------
/frontend/svelte.config.js:
--------------------------------------------------------------------------------
 1 | import adapter from "@sveltejs/adapter-static";
 2 | import { vitePreprocess } from "@sveltejs/vite-plugin-svelte";
 3 | 
 4 | const config = {
 5 |   preprocess: vitePreprocess(),
 6 |   kit: {
 7 |     adapter: adapter({
 8 |       pages: "public",
 9 |       assets: "public",
10 |       fallback: undefined,
11 |       precompress: false,
12 |       strict: true,
13 |     }),
14 |   },
15 | };
16 | 
17 | export default config;
18 | 


--------------------------------------------------------------------------------
/frontend/tailwind.config.js:
--------------------------------------------------------------------------------
1 | /** @type {import('tailwindcss').Config} */
2 | export default {
3 |   content: ["./src/**/*.{html,js,svelte,ts}"],
4 |   theme: {
5 |     extend: {},
6 |   },
7 |   plugins: [import("@tailwindcss/typography")],
8 | };
9 | 


--------------------------------------------------------------------------------
/frontend/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"extends": "./.svelte-kit/tsconfig.json",
 3 | 	"compilerOptions": {
 4 | 		"allowJs": true,
 5 | 		"checkJs": true,
 6 | 		"esModuleInterop": true,
 7 | 		"forceConsistentCasingInFileNames": true,
 8 | 		"resolveJsonModule": true,
 9 | 		"skipLibCheck": true,
10 | 		"sourceMap": true,
11 | 		"strict": true,
12 | 		"moduleResolution": "bundler"
13 | 	}
14 | 	// Path aliases are handled by https://svelte.dev/docs/kit/configuration#alias
15 | 	// except $lib which is handled by https://svelte.dev/docs/kit/configuration#files
16 | 	//
17 | 	// If you want to overwrite includes/excludes, make sure to copy over the relevant includes/excludes
18 | 	// from the referenced tsconfig.json - TypeScript does not merge them in
19 | }
20 | 


--------------------------------------------------------------------------------
/frontend/vite.config.ts:
--------------------------------------------------------------------------------
 1 | import { sveltekit } from "@sveltejs/kit/vite";
 2 | import tailwindcss from "@tailwindcss/vite";
 3 | import { defineConfig } from "vite";
 4 | 
 5 | export default defineConfig({
 6 |   plugins: [tailwindcss(), sveltekit()],
 7 |   server: {
 8 |     proxy: {
 9 |       "/api": "http://localhost:7860",
10 |       "/api/ws": {
11 |         target: "ws://localhost:7860",
12 |         ws: true,
13 |       },
14 |     },
15 |   },
16 | });
17 | 


--------------------------------------------------------------------------------
/qr-code.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radames/Real-Time-Latent-Consistency-Model/269f4347d93eb8e366e12b0f8f216c8b11262e76/qr-code.png


--------------------------------------------------------------------------------
/server/config.py:
--------------------------------------------------------------------------------
  1 | from pydantic import BaseModel, field_validator
  2 | import argparse
  3 | import os
  4 | from typing import Annotated
  5 | 
  6 | 
  7 | class Args(BaseModel):
  8 |     host: str
  9 |     port: int
 10 |     reload: bool
 11 |     max_queue_size: int
 12 |     timeout: float
 13 |     safety_checker: bool
 14 |     torch_compile: bool
 15 |     taesd: bool
 16 |     pipeline: str
 17 |     ssl_certfile: str | None
 18 |     ssl_keyfile: str | None
 19 |     sfast: bool
 20 |     onediff: bool = False
 21 |     compel: bool = False
 22 |     debug: bool = False
 23 |     pruna: bool = False
 24 | 
 25 |     def pretty_print(self) -> None:
 26 |         print("\n")
 27 |         for field, value in self.model_dump().items():
 28 |             print(f"{field}: {value}")
 29 |         print("\n")
 30 | 
 31 |     @field_validator("ssl_keyfile")
 32 |     @classmethod
 33 |     def validate_ssl_keyfile(cls, v: str | None, info) -> str | None:
 34 |         """Validate that if ssl_certfile is provided, ssl_keyfile is also provided."""
 35 |         ssl_certfile = info.data.get("ssl_certfile")
 36 |         if ssl_certfile and not v:
 37 |             raise ValueError(
 38 |                 "If ssl_certfile is provided, ssl_keyfile must also be provided"
 39 |             )
 40 |         return v
 41 | 
 42 | 
 43 | MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
 44 | TIMEOUT = float(os.environ.get("TIMEOUT", 0))
 45 | SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None) == "True"
 46 | TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None) == "True"
 47 | USE_TAESD = os.environ.get("USE_TAESD", "False") == "True"
 48 | default_host = os.getenv("HOST", "0.0.0.0")
 49 | default_port = int(os.getenv("PORT", "7860"))
 50 | 
 51 | parser = argparse.ArgumentParser(description="Run the app")
 52 | parser.add_argument("--host", type=str, default=default_host, help="Host address")
 53 | parser.add_argument("--port", type=int, default=default_port, help="Port number")
 54 | parser.add_argument("--reload", action="store_true", help="Reload code on change")
 55 | parser.add_argument(
 56 |     "--max-queue-size",
 57 |     dest="max_queue_size",
 58 |     type=int,
 59 |     default=MAX_QUEUE_SIZE,
 60 |     help="Max Queue Size",
 61 | )
 62 | parser.add_argument("--timeout", type=float, default=TIMEOUT, help="Timeout")
 63 | parser.add_argument(
 64 |     "--safety-checker",
 65 |     dest="safety_checker",
 66 |     action="store_true",
 67 |     default=SAFETY_CHECKER,
 68 |     help="Safety Checker",
 69 | )
 70 | parser.add_argument(
 71 |     "--torch-compile",
 72 |     dest="torch_compile",
 73 |     action="store_true",
 74 |     default=TORCH_COMPILE,
 75 |     help="Torch Compile",
 76 | )
 77 | parser.add_argument(
 78 |     "--taesd",
 79 |     dest="taesd",
 80 |     action="store_true",
 81 |     help="Use Tiny Autoencoder",
 82 | )
 83 | parser.add_argument(
 84 |     "--pipeline",
 85 |     type=str,
 86 |     default="txt2img",
 87 |     help="Pipeline to use",
 88 | )
 89 | parser.add_argument(
 90 |     "--ssl-certfile",
 91 |     dest="ssl_certfile",
 92 |     type=str,
 93 |     default=None,
 94 |     help="SSL certfile",
 95 | )
 96 | parser.add_argument(
 97 |     "--ssl-keyfile",
 98 |     dest="ssl_keyfile",
 99 |     type=str,
100 |     default=None,
101 |     help="SSL keyfile",
102 | )
103 | parser.add_argument(
104 |     "--debug",
105 |     action="store_true",
106 |     default=False,
107 |     help="Debug",
108 | )
109 | parser.add_argument(
110 |     "--compel",
111 |     action="store_true",
112 |     default=False,
113 |     help="Compel",
114 | )
115 | parser.add_argument(
116 |     "--sfast",
117 |     action="store_true",
118 |     default=False,
119 |     help="Enable Stable Fast",
120 | )
121 | parser.add_argument(
122 |     "--onediff",
123 |     action="store_true",
124 |     default=False,
125 |     help="Enable OneDiff",
126 | )
127 | parser.add_argument(
128 |     "--pruna",
129 |     action="store_true",
130 |     default=False,
131 |     help="Enable Pruna",
132 | )
133 | parser.set_defaults(taesd=USE_TAESD)
134 | 
135 | config = Args.model_validate(vars(parser.parse_args()))
136 | config.pretty_print()
137 | 


--------------------------------------------------------------------------------
/server/device.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | # check if MPS is available OSX only M1/M2/M3 chips
 4 | mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
 5 | xpu_available = hasattr(torch, "xpu") and torch.xpu.is_available()
 6 | device = torch.device(
 7 |     "cuda" if torch.cuda.is_available() else "xpu" if xpu_available else "cpu"
 8 | )
 9 | torch_dtype = torch.float16
10 | if mps_available:
11 |     device = torch.device("mps")
12 |     torch_dtype = torch.float32
13 | 


--------------------------------------------------------------------------------
/server/pipelines/IPcompositionHyperSD15.py:
--------------------------------------------------------------------------------
  1 | from diffusers import (
  2 |     DiffusionPipeline,
  3 |     TCDScheduler,
  4 | )
  5 | from compel import Compel
  6 | import torch
  7 | from transformers import CLIPVisionModelWithProjection
  8 | from huggingface_hub import hf_hub_download
  9 | 
 10 | try:
 11 |     import intel_extension_for_pytorch as ipex  # type: ignore
 12 | except:
 13 |     pass
 14 | 
 15 | from config import Args
 16 | from pydantic import BaseModel, Field
 17 | from util import ParamsModel
 18 | from PIL import Image
 19 | 
 20 | model_id = "runwayml/stable-diffusion-v1-5"
 21 | ip_adapter_model = "ostris/ip-composition-adapter"
 22 | file_name = "ip_plus_composition_sd15.safetensors"
 23 | 
 24 | default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
 25 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
 26 | page_content = """
 27 | <h1 class="text-3xl font-bold">Hyper-SD Unified + IP Adpater Composition</h1>
 28 | <h3 class="text-xl font-bold">Image-to-Image ControlNet</h3>
 29 | 
 30 | """
 31 | 
 32 | 
 33 | class Pipeline:
 34 |     class Info(BaseModel):
 35 |         name: str = "controlnet+SDXL+Turbo"
 36 |         title: str = "SDXL Turbo + Controlnet"
 37 |         description: str = "Generates an image from a text prompt"
 38 |         input_mode: str = "image"
 39 |         page_content: str = page_content
 40 | 
 41 |     class InputParams(ParamsModel):
 42 |         prompt: str = Field(
 43 |             default_prompt,
 44 |             title="Prompt",
 45 |             field="textarea",
 46 |             id="prompt",
 47 |         )
 48 |         negative_prompt: str = Field(
 49 |             default_negative_prompt,
 50 |             title="Negative Prompt",
 51 |             field="textarea",
 52 |             id="negative_prompt",
 53 |             hide=True,
 54 |         )
 55 |         seed: int = Field(
 56 |             2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
 57 |         )
 58 |         steps: int = Field(
 59 |             2, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
 60 |         )
 61 |         width: int = Field(
 62 |             512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
 63 |         )
 64 |         height: int = Field(
 65 |             512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
 66 |         )
 67 |         guidance_scale: float = Field(
 68 |             0.0,
 69 |             min=0,
 70 |             max=10,
 71 |             step=0.001,
 72 |             title="Guidance Scale",
 73 |             field="range",
 74 |             hide=True,
 75 |             id="guidance_scale",
 76 |         )
 77 |         ip_adapter_scale: float = Field(
 78 |             0.8,
 79 |             min=0.0,
 80 |             max=1.0,
 81 |             step=0.001,
 82 |             title="IP Adapter Scale",
 83 |             field="range",
 84 |             hide=True,
 85 |             id="ip_adapter_scale",
 86 |         )
 87 |         eta: float = Field(
 88 |             1.0,
 89 |             min=0,
 90 |             max=1.0,
 91 |             step=0.001,
 92 |             title="Eta",
 93 |             field="range",
 94 |             hide=True,
 95 |             id="eta",
 96 |         )
 97 | 
 98 |     def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
 99 |         image_encoder = CLIPVisionModelWithProjection.from_pretrained(
100 |             "h94/IP-Adapter",
101 |             subfolder="models/image_encoder",
102 |             torch_dtype=torch.float16,
103 |         ).to(device)
104 | 
105 |         self.pipe = DiffusionPipeline.from_pretrained(
106 |             model_id,
107 |             safety_checker=None,
108 |             torch_dtype=torch_dtype,
109 |             image_encoder=image_encoder,
110 |             variant="fp16",
111 |         )
112 | 
113 |         self.pipe.load_ip_adapter(
114 |             ip_adapter_model,
115 |             subfolder="",
116 |             weight_name=[file_name],
117 |             image_encoder_folder=None,
118 |         )
119 | 
120 |         self.pipe.load_lora_weights(
121 |             hf_hub_download("ByteDance/Hyper-SD", "Hyper-SD15-1step-lora.safetensors")
122 |         )
123 |         self.pipe.fuse_lora()
124 | 
125 |         self.pipe.scheduler = TCDScheduler.from_config(self.pipe.scheduler.config)
126 |         self.pipe.set_ip_adapter_scale([0.8])
127 | 
128 |         #         if args.compile:
129 |         # pipe.unet = oneflow_compile(pipe.unet, options=compile_options)
130 |         # pipe.vae.decoder = oneflow_compile(pipe.vae.decoder, options=compile_options)
131 | 
132 |         if args.sfast:
133 |             from sfast.compilers.stable_diffusion_pipeline_compiler import (
134 |                 compile,
135 |                 CompilationConfig,
136 |             )
137 | 
138 |             config = CompilationConfig.Default()
139 |             # config.enable_xformers = True
140 |             config.enable_triton = True
141 |             config.enable_cuda_graph = True
142 |             # cofig.
143 |             self.pipe = compile(self.pipe, config=config)
144 | 
145 |         self.pipe.set_progress_bar_config(disable=True)
146 |         self.pipe.to(device=device)
147 |         if device.type != "mps":
148 |             self.pipe.unet.to(memory_format=torch.channels_last)
149 | 
150 |         if args.compel:
151 |             self.compel_proc = Compel(
152 |                 tokenizer=self.pipe.tokenizer,
153 |                 text_encoder=self.pipe.text_encoder,
154 |                 truncate_long_prompts=False,
155 |             )
156 | 
157 |         if args.torch_compile:
158 |             self.pipe.unet = torch.compile(
159 |                 self.pipe.unet, mode="reduce-overhead", fullgraph=True
160 |             )
161 |             self.pipe.vae = torch.compile(
162 |                 self.pipe.vae, mode="reduce-overhead", fullgraph=True
163 |             )
164 |             self.pipe(
165 |                 prompt="warmup",
166 |                 image=[Image.new("RGB", (768, 768))],
167 |             )
168 | 
169 |     def predict(self, params: "Pipeline.InputParams") -> Image.Image:
170 |         generator = torch.manual_seed(params.seed)
171 |         self.pipe.set_ip_adapter_scale([params.ip_adapter_scale])
172 | 
173 |         prompt_embeds = None
174 |         prompt = params.prompt
175 |         if hasattr(self, "compel_proc"):
176 |             prompt_embeds = self.compel_proc(prompt)
177 |             prompt = None
178 | 
179 |         steps = params.steps
180 | 
181 |         results = self.pipe(
182 |             prompt=prompt,
183 |             prompt_embeds=prompt_embeds,
184 |             generator=generator,
185 |             num_inference_steps=steps,
186 |             guidance_scale=params.guidance_scale,
187 |             width=params.width,
188 |             eta=params.eta,
189 |             height=params.height,
190 |             ip_adapter_image=[params.image],
191 |             output_type="pil",
192 |         )
193 | 
194 |         return results.images[0]
195 | 


--------------------------------------------------------------------------------
/server/pipelines/IPcompositionHyperSDXL.py:
--------------------------------------------------------------------------------
  1 | from diffusers import (
  2 |     StableDiffusionXLPipeline,
  3 |     AutoencoderKL,
  4 |     TCDScheduler,
  5 | )
  6 | from compel import Compel, ReturnedEmbeddingsType
  7 | import torch
  8 | from transformers import CLIPVisionModelWithProjection
  9 | from huggingface_hub import hf_hub_download
 10 | 
 11 | try:
 12 |     import intel_extension_for_pytorch as ipex  # type: ignore
 13 | except:
 14 |     pass
 15 | 
 16 | from config import Args
 17 | from pydantic import BaseModel, Field
 18 | from util import ParamsModel
 19 | from PIL import Image
 20 | 
 21 | model_id = "stabilityai/stable-diffusion-xl-base-1.0"
 22 | taesd_model = "madebyollin/taesdxl"
 23 | ip_adapter_model = "ostris/ip-composition-adapter"
 24 | file_name = "ip_plus_composition_sdxl.safetensors"
 25 | 
 26 | default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
 27 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
 28 | page_content = """
 29 | <h1 class="text-3xl font-bold">Hyper-SDXL Unified + IP Adpater Composition</h1>
 30 | <h3 class="text-xl font-bold">Image-to-Image ControlNet</h3>
 31 | 
 32 | """
 33 | 
 34 | 
 35 | class Pipeline:
 36 |     class Info(BaseModel):
 37 |         name: str = "controlnet+SDXL+Turbo"
 38 |         title: str = "SDXL Turbo + Controlnet"
 39 |         description: str = "Generates an image from a text prompt"
 40 |         input_mode: str = "image"
 41 |         page_content: str = page_content
 42 | 
 43 |     class InputParams(ParamsModel):
 44 |         prompt: str = Field(
 45 |             default_prompt,
 46 |             title="Prompt",
 47 |             field="textarea",
 48 |             id="prompt",
 49 |         )
 50 |         negative_prompt: str = Field(
 51 |             default_negative_prompt,
 52 |             title="Negative Prompt",
 53 |             field="textarea",
 54 |             id="negative_prompt",
 55 |             hide=True,
 56 |         )
 57 |         seed: int = Field(
 58 |             2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
 59 |         )
 60 |         steps: int = Field(
 61 |             2, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
 62 |         )
 63 |         width: int = Field(
 64 |             1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
 65 |         )
 66 |         height: int = Field(
 67 |             1024, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
 68 |         )
 69 |         guidance_scale: float = Field(
 70 |             0.0,
 71 |             min=0,
 72 |             max=10,
 73 |             step=0.001,
 74 |             title="Guidance Scale",
 75 |             field="range",
 76 |             hide=True,
 77 |             id="guidance_scale",
 78 |         )
 79 |         ip_adapter_scale: float = Field(
 80 |             0.8,
 81 |             min=0.0,
 82 |             max=1.0,
 83 |             step=0.001,
 84 |             title="IP Adapter Scale",
 85 |             field="range",
 86 |             hide=True,
 87 |             id="ip_adapter_scale",
 88 |         )
 89 |         eta: float = Field(
 90 |             1.0,
 91 |             min=0,
 92 |             max=1.0,
 93 |             step=0.001,
 94 |             title="Eta",
 95 |             field="range",
 96 |             hide=True,
 97 |             id="eta",
 98 |         )
 99 | 
100 |     def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
101 |         vae = AutoencoderKL.from_pretrained(
102 |             "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype
103 |         )
104 |         image_encoder = CLIPVisionModelWithProjection.from_pretrained(
105 |             "h94/IP-Adapter",
106 |             subfolder="models/image_encoder",
107 |             torch_dtype=torch.float16,
108 |         ).to(device)
109 | 
110 |         self.pipe = StableDiffusionXLPipeline.from_pretrained(
111 |             model_id,
112 |             safety_checker=None,
113 |             torch_dtype=torch_dtype,
114 |             vae=vae,
115 |             image_encoder=image_encoder,
116 |             variant="fp16",
117 |         )
118 |         self.pipe.load_ip_adapter(
119 |             ip_adapter_model,
120 |             subfolder="",
121 |             weight_name=[file_name],
122 |             image_encoder_folder=None,
123 |         )
124 | 
125 |         self.pipe.load_lora_weights(
126 |             hf_hub_download("ByteDance/Hyper-SD", "Hyper-SDXL-1step-lora.safetensors")
127 |         )
128 |         self.pipe.fuse_lora()
129 | 
130 |         self.pipe.scheduler = TCDScheduler.from_config(self.pipe.scheduler.config)
131 |         self.pipe.set_ip_adapter_scale([0.8])
132 | 
133 |         if args.sfast:
134 |             from sfast.compilers.stable_diffusion_pipeline_compiler import (
135 |                 compile,
136 |                 CompilationConfig,
137 |             )
138 | 
139 |             config = CompilationConfig.Default()
140 |             # config.enable_xformers = True
141 |             config.enable_triton = True
142 |             config.enable_cuda_graph = True
143 |             self.pipe = compile(self.pipe, config=config)
144 | 
145 |         self.pipe.set_progress_bar_config(disable=True)
146 |         self.pipe.to(device=device)
147 |         if device.type != "mps":
148 |             self.pipe.unet.to(memory_format=torch.channels_last)
149 | 
150 |         if args.compel:
151 |             self.pipe.compel_proc = Compel(
152 |                 tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
153 |                 text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
154 |                 returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
155 |                 requires_pooled=[False, True],
156 |             )
157 | 
158 |         if args.torch_compile:
159 |             self.pipe.unet = torch.compile(
160 |                 self.pipe.unet, mode="reduce-overhead", fullgraph=True
161 |             )
162 |             self.pipe.vae = torch.compile(
163 |                 self.pipe.vae, mode="reduce-overhead", fullgraph=True
164 |             )
165 |             self.pipe(
166 |                 prompt="warmup",
167 |                 image=[Image.new("RGB", (768, 768))],
168 |             )
169 | 
170 |     def predict(self, params: "Pipeline.InputParams") -> Image.Image:
171 |         generator = torch.manual_seed(params.seed)
172 |         self.pipe.set_ip_adapter_scale([params.ip_adapter_scale])
173 | 
174 |         prompt = params.prompt
175 |         negative_prompt = params.negative_prompt
176 |         prompt_embeds = None
177 |         pooled_prompt_embeds = None
178 |         negative_prompt_embeds = None
179 |         negative_pooled_prompt_embeds = None
180 |         if hasattr(self.pipe, "compel_proc"):
181 |             _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
182 |                 [params.prompt, params.negative_prompt]
183 |             )
184 |             prompt = None
185 |             negative_prompt = None
186 |             prompt_embeds = _prompt_embeds[0:1]
187 |             pooled_prompt_embeds = pooled_prompt_embeds[0:1]
188 |             negative_prompt_embeds = _prompt_embeds[1:2]
189 |             negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2]
190 | 
191 |         steps = params.steps
192 | 
193 |         results = self.pipe(
194 |             prompt=prompt,
195 |             negative_prompt=negative_prompt,
196 |             prompt_embeds=prompt_embeds,
197 |             pooled_prompt_embeds=pooled_prompt_embeds,
198 |             negative_prompt_embeds=negative_prompt_embeds,
199 |             negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
200 |             generator=generator,
201 |             num_inference_steps=steps,
202 |             guidance_scale=params.guidance_scale,
203 |             width=params.width,
204 |             eta=params.eta,
205 |             height=params.height,
206 |             ip_adapter_image=[params.image],
207 |             output_type="pil",
208 |         )
209 |         return results.images[0]
210 | 


--------------------------------------------------------------------------------
/server/pipelines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radames/Real-Time-Latent-Consistency-Model/269f4347d93eb8e366e12b0f8f216c8b11262e76/server/pipelines/__init__.py


--------------------------------------------------------------------------------
/server/pipelines/controlnetFlashSD.py:
--------------------------------------------------------------------------------
  1 | from diffusers import (
  2 |     StableDiffusionControlNetImg2ImgPipeline,
  3 |     ControlNetModel,
  4 |     AutoencoderTiny,
  5 |     LCMScheduler,
  6 | )
  7 | from compel import Compel, ReturnedEmbeddingsType
  8 | import torch
  9 | from pipelines.utils.canny_gpu import SobelOperator
 10 | 
 11 | try:
 12 |     import intel_extension_for_pytorch as ipex  # type: ignore
 13 | except:
 14 |     pass
 15 | 
 16 | from config import Args
 17 | from pydantic import BaseModel, Field
 18 | from util import ParamsModel
 19 | from PIL import Image
 20 | import math
 21 | 
 22 | controlnet_model = "lllyasviel/control_v11p_sd15_canny"
 23 | model_id = "runwayml/stable-diffusion-v1-5"
 24 | taesd_model = "madebyollin/taesd"
 25 | 
 26 | default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
 27 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
 28 | page_content = """
 29 | <h1 class="text-3xl font-bold">Flash-SD</h1>
 30 | <h3 class="text-xl font-bold">Image-to-Image ControlNet</h3>
 31 | 
 32 | """
 33 | 
 34 | 
 35 | class Pipeline:
 36 |     class Info(BaseModel):
 37 |         name: str = "controlnet+SDXL+Turbo"
 38 |         title: str = "SDXL Turbo + Controlnet"
 39 |         description: str = "Generates an image from a text prompt"
 40 |         input_mode: str = "image"
 41 |         page_content: str = page_content
 42 | 
 43 |     class InputParams(ParamsModel):
 44 |         prompt: str = Field(
 45 |             default_prompt,
 46 |             title="Prompt",
 47 |             field="textarea",
 48 |             id="prompt",
 49 |         )
 50 |         negative_prompt: str = Field(
 51 |             default_negative_prompt,
 52 |             title="Negative Prompt",
 53 |             field="textarea",
 54 |             id="negative_prompt",
 55 |             hide=True,
 56 |         )
 57 |         seed: int = Field(
 58 |             2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
 59 |         )
 60 |         steps: int = Field(
 61 |             2, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
 62 |         )
 63 |         width: int = Field(
 64 |             512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
 65 |         )
 66 |         height: int = Field(
 67 |             512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
 68 |         )
 69 |         strength: float = Field(
 70 |             0.5,
 71 |             min=0.25,
 72 |             max=1.0,
 73 |             step=0.001,
 74 |             title="Strength",
 75 |             field="range",
 76 |             hide=True,
 77 |             id="strength",
 78 |         )
 79 |         controlnet_scale: float = Field(
 80 |             0.5,
 81 |             min=0,
 82 |             max=1.0,
 83 |             step=0.001,
 84 |             title="Controlnet Scale",
 85 |             field="range",
 86 |             hide=True,
 87 |             id="controlnet_scale",
 88 |         )
 89 |         controlnet_start: float = Field(
 90 |             0.0,
 91 |             min=0,
 92 |             max=1.0,
 93 |             step=0.001,
 94 |             title="Controlnet Start",
 95 |             field="range",
 96 |             hide=True,
 97 |             id="controlnet_start",
 98 |         )
 99 |         controlnet_end: float = Field(
100 |             1.0,
101 |             min=0,
102 |             max=1.0,
103 |             step=0.001,
104 |             title="Controlnet End",
105 |             field="range",
106 |             hide=True,
107 |             id="controlnet_end",
108 |         )
109 |         canny_low_threshold: float = Field(
110 |             0.31,
111 |             min=0,
112 |             max=1.0,
113 |             step=0.001,
114 |             title="Canny Low Threshold",
115 |             field="range",
116 |             hide=True,
117 |             id="canny_low_threshold",
118 |         )
119 |         canny_high_threshold: float = Field(
120 |             0.125,
121 |             min=0,
122 |             max=1.0,
123 |             step=0.001,
124 |             title="Canny High Threshold",
125 |             field="range",
126 |             hide=True,
127 |             id="canny_high_threshold",
128 |         )
129 |         debug_canny: bool = Field(
130 |             False,
131 |             title="Debug Canny",
132 |             field="checkbox",
133 |             hide=True,
134 |             id="debug_canny",
135 |         )
136 | 
137 |     def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
138 |         controlnet_canny = ControlNetModel.from_pretrained(
139 |             controlnet_model, torch_dtype=torch_dtype
140 |         )
141 | 
142 |         self.pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
143 |             model_id,
144 |             safety_checker=None,
145 |             controlnet=controlnet_canny,
146 |             torch_dtype=torch_dtype,
147 |         )
148 | 
149 |         self.pipe.scheduler = LCMScheduler.from_pretrained(
150 |             model_id,
151 |             subfolder="scheduler",
152 |             timestep_spacing="trailing",
153 |         )
154 | 
155 |         if args.taesd:
156 |             self.pipe.vae = AutoencoderTiny.from_pretrained(
157 |                 taesd_model, torch_dtype=torch_dtype, use_safetensors=True
158 |             )
159 |         self.pipe.load_lora_weights("jasperai/flash-sd")
160 |         self.pipe.fuse_lora()
161 | 
162 |         self.canny_torch = SobelOperator(device=device)
163 | 
164 |         if args.sfast:
165 |             from sfast.compilers.stable_diffusion_pipeline_compiler import (
166 |                 compile,
167 |                 CompilationConfig,
168 |             )
169 | 
170 |             config = CompilationConfig.Default()
171 |             # config.enable_xformers = True
172 |             config.enable_triton = True
173 |             config.enable_cuda_graph = True
174 |             self.pipe = compile(self.pipe, config=config)
175 | 
176 |         self.pipe.set_progress_bar_config(disable=True)
177 |         self.pipe.to(device=device)
178 |         if device.type != "mps":
179 |             self.pipe.unet.to(memory_format=torch.channels_last)
180 | 
181 |         if args.compel:
182 |             self.pipe.compel_proc = Compel(
183 |                 tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
184 |                 text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
185 |                 returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
186 |                 requires_pooled=[False, True],
187 |             )
188 | 
189 |         if args.torch_compile:
190 |             self.pipe.unet = torch.compile(
191 |                 self.pipe.unet, mode="reduce-overhead", fullgraph=True
192 |             )
193 |             self.pipe.vae = torch.compile(
194 |                 self.pipe.vae, mode="reduce-overhead", fullgraph=True
195 |             )
196 |             self.pipe(
197 |                 prompt="warmup",
198 |                 image=[Image.new("RGB", (768, 768))],
199 |                 control_image=[Image.new("RGB", (768, 768))],
200 |             )
201 | 
202 |     def predict(self, params: "Pipeline.InputParams") -> Image.Image:
203 |         generator = torch.manual_seed(params.seed)
204 | 
205 |         prompt = params.prompt
206 |         negative_prompt = params.negative_prompt
207 |         prompt_embeds = None
208 |         pooled_prompt_embeds = None
209 |         negative_prompt_embeds = None
210 |         negative_pooled_prompt_embeds = None
211 |         if hasattr(self.pipe, "compel_proc"):
212 |             _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
213 |                 [params.prompt, params.negative_prompt]
214 |             )
215 |             prompt = None
216 |             negative_prompt = None
217 |             prompt_embeds = _prompt_embeds[0:1]
218 |             pooled_prompt_embeds = pooled_prompt_embeds[0:1]
219 |             negative_prompt_embeds = _prompt_embeds[1:2]
220 |             negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2]
221 | 
222 |         control_image = self.canny_torch(
223 |             params.image, params.canny_low_threshold, params.canny_high_threshold
224 |         )
225 |         steps = params.steps
226 |         strength = params.strength
227 |         if int(steps * strength) < 1:
228 |             steps = math.ceil(1 / max(0.10, strength))
229 | 
230 |         results = self.pipe(
231 |             image=params.image,
232 |             control_image=control_image,
233 |             prompt=prompt,
234 |             negative_prompt=negative_prompt,
235 |             prompt_embeds=prompt_embeds,
236 |             pooled_prompt_embeds=pooled_prompt_embeds,
237 |             negative_prompt_embeds=negative_prompt_embeds,
238 |             negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
239 |             generator=generator,
240 |             strength=strength,
241 |             num_inference_steps=steps,
242 |             guidance_scale=0,
243 |             width=params.width,
244 |             height=params.height,
245 |             output_type="pil",
246 |             controlnet_conditioning_scale=params.controlnet_scale,
247 |             control_guidance_start=params.controlnet_start,
248 |             control_guidance_end=params.controlnet_end,
249 |         )
250 | 
251 |         result_image = results.images[0]
252 |         if params.debug_canny:
253 |             # paste control_image on top of result_image
254 |             w0, h0 = (200, 200)
255 |             control_image = control_image.resize((w0, h0))
256 |             w1, h1 = result_image.size
257 |             result_image.paste(control_image, (w1 - w0, h1 - h0))
258 | 
259 |         return result_image
260 | 


--------------------------------------------------------------------------------
/server/pipelines/controlnetLoraSD15QRCode.py:
--------------------------------------------------------------------------------
  1 | from diffusers import (
  2 |     StableDiffusionControlNetImg2ImgPipeline,
  3 |     ControlNetModel,
  4 |     LCMScheduler,
  5 |     AutoencoderTiny,
  6 | )
  7 | from compel import Compel
  8 | import torch
  9 | 
 10 | try:
 11 |     import intel_extension_for_pytorch as ipex  # type: ignore
 12 | except:
 13 |     pass
 14 | 
 15 | import psutil
 16 | from config import Args
 17 | from pydantic import BaseModel, Field
 18 | from util import ParamsModel
 19 | from PIL import Image
 20 | import math
 21 | 
 22 | taesd_model = "madebyollin/taesd"
 23 | controlnet_model = "monster-labs/control_v1p_sd15_qrcode_monster"
 24 | base_model = "nitrosocke/mo-di-diffusion"
 25 | lcm_lora_id = "latent-consistency/lcm-lora-sdv1-5"
 26 | default_prompt = "abstract art of a men with curly hair by Pablo Picasso"
 27 | page_content = """
 28 | <h1 class="text-3xl font-bold">Real-Time Latent Consistency Model SDv1.5</h1>
 29 | <h3 class="text-xl font-bold">LCM + LoRA + Controlnet + QRCode</h3>
 30 | <p class="text-sm">
 31 |     This demo showcases
 32 |     <a
 33 |     href="https://huggingface.co/blog/lcm_lora"
 34 |     target="_blank"
 35 |     class="text-blue-500 underline hover:no-underline">LCM LoRA</a>
 36 | + ControlNet + Image to Imasge pipeline using
 37 |     <a
 38 |     href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/lcm#performing-inference-with-lcm"
 39 |     target="_blank"
 40 |     class="text-blue-500 underline hover:no-underline">Diffusers</a
 41 |     > with a MJPEG stream server.
 42 | </p>
 43 | <p class="text-sm text-gray-500">
 44 |     Change the prompt to generate different images, accepts <a
 45 |     href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
 46 |     target="_blank"
 47 |     class="text-blue-500 underline hover:no-underline">Compel</a
 48 |     > syntax.
 49 | </p>
 50 | """
 51 | 
 52 | 
 53 | class Pipeline:
 54 |     class Info(BaseModel):
 55 |         name: str = "controlnet+loras+sd15"
 56 |         title: str = "LCM + LoRA + Controlnet"
 57 |         description: str = "Generates an image from a text prompt"
 58 |         input_mode: str = "image"
 59 |         page_content: str = page_content
 60 | 
 61 |     class InputParams(ParamsModel):
 62 |         prompt: str = Field(
 63 |             default_prompt,
 64 |             title="Prompt",
 65 |             field="textarea",
 66 |             id="prompt",
 67 |         )
 68 |         seed: int = Field(
 69 |             2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
 70 |         )
 71 |         steps: int = Field(
 72 |             5, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
 73 |         )
 74 |         width: int = Field(
 75 |             512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
 76 |         )
 77 |         height: int = Field(
 78 |             512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
 79 |         )
 80 |         guidance_scale: float = Field(
 81 |             1.0,
 82 |             min=0,
 83 |             max=2,
 84 |             step=0.001,
 85 |             title="Guidance Scale",
 86 |             field="range",
 87 |             hide=True,
 88 |             id="guidance_scale",
 89 |         )
 90 |         strength: float = Field(
 91 |             0.6,
 92 |             min=0.25,
 93 |             max=1.0,
 94 |             step=0.001,
 95 |             title="Strength",
 96 |             field="range",
 97 |             hide=True,
 98 |             id="strength",
 99 |         )
100 |         controlnet_scale: float = Field(
101 |             1.0,
102 |             min=0,
103 |             max=1.0,
104 |             step=0.001,
105 |             title="Controlnet Scale",
106 |             field="range",
107 |             hide=True,
108 |             id="controlnet_scale",
109 |         )
110 |         controlnet_start: float = Field(
111 |             0.0,
112 |             min=0,
113 |             max=1.0,
114 |             step=0.001,
115 |             title="Controlnet Start",
116 |             field="range",
117 |             hide=True,
118 |             id="controlnet_start",
119 |         )
120 |         controlnet_end: float = Field(
121 |             1.0,
122 |             min=0,
123 |             max=1.0,
124 |             step=0.001,
125 |             title="Controlnet End",
126 |             field="range",
127 |             hide=True,
128 |             id="controlnet_end",
129 |         )
130 |         blend: float = Field(
131 |             0.1,
132 |             min=0.0,
133 |             max=1.0,
134 |             step=0.001,
135 |             title="Blend",
136 |             field="range",
137 |             hide=True,
138 |             id="blend",
139 |         )
140 | 
141 |     def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
142 |         controlnet_qrcode = ControlNetModel.from_pretrained(
143 |             controlnet_model, torch_dtype=torch_dtype, subfolder="v2"
144 |         ).to(device)
145 | 
146 |         if args.safety_checker:
147 |             self.pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
148 |                 base_model,
149 |                 controlnet=controlnet_qrcode,
150 |             )
151 |         else:
152 |             self.pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
153 |                 base_model,
154 |                 safety_checker=None,
155 |                 controlnet=controlnet_qrcode,
156 |             )
157 | 
158 |         self.control_image = Image.open("qr-code.png").convert("RGB").resize((512, 512))
159 | 
160 |         self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
161 |         self.pipe.set_progress_bar_config(disable=True)
162 |         if device.type != "mps":
163 |             self.pipe.unet.to(memory_format=torch.channels_last)
164 | 
165 |         if args.taesd:
166 |             self.pipe.vae = AutoencoderTiny.from_pretrained(
167 |                 taesd_model, torch_dtype=torch_dtype, use_safetensors=True
168 |             ).to(device)
169 | 
170 |         # Load LCM LoRA
171 |         self.pipe.load_lora_weights(lcm_lora_id, adapter_name="lcm")
172 |         self.pipe.to(device=device, dtype=torch_dtype).to(device)
173 |         if args.compel:
174 |             self.compel_proc = Compel(
175 |                 tokenizer=self.pipe.tokenizer,
176 |                 text_encoder=self.pipe.text_encoder,
177 |                 truncate_long_prompts=False,
178 |             )
179 |         if args.torch_compile:
180 |             self.pipe.unet = torch.compile(
181 |                 self.pipe.unet, mode="reduce-overhead", fullgraph=True
182 |             )
183 |             self.pipe.vae = torch.compile(
184 |                 self.pipe.vae, mode="reduce-overhead", fullgraph=True
185 |             )
186 |             self.pipe(
187 |                 prompt="warmup",
188 |                 image=[Image.new("RGB", (512, 512))],
189 |                 control_image=[Image.new("RGB", (512, 512))],
190 |             )
191 | 
192 |     def predict(self, params: "Pipeline.InputParams") -> Image.Image:
193 |         generator = torch.manual_seed(params.seed)
194 | 
195 |         prompt = f"modern disney style {params.prompt}"
196 |         prompt_embeds = None
197 |         prompt = params.prompt
198 |         if hasattr(self, "compel_proc"):
199 |             prompt_embeds = self.compel_proc(prompt)
200 |             prompt = None
201 | 
202 |         steps = params.steps
203 |         strength = params.strength
204 |         if int(steps * strength) < 1:
205 |             steps = math.ceil(1 / max(0.10, strength))
206 | 
207 |         blend_qr_image = Image.blend(
208 |             params.image, self.control_image, alpha=params.blend
209 |         )
210 |         results = self.pipe(
211 |             image=blend_qr_image,
212 |             control_image=self.control_image,
213 |             prompt=prompt,
214 |             prompt_embeds=prompt_embeds,
215 |             generator=generator,
216 |             strength=strength,
217 |             num_inference_steps=steps,
218 |             guidance_scale=params.guidance_scale,
219 |             width=params.width,
220 |             height=params.height,
221 |             output_type="pil",
222 |             controlnet_conditioning_scale=params.controlnet_scale,
223 |             control_guidance_start=params.controlnet_start,
224 |             control_guidance_end=params.controlnet_end,
225 |         )
226 | 
227 |         return results.images[0]
228 | 


--------------------------------------------------------------------------------
/server/pipelines/controlnetPCMSD15.py:
--------------------------------------------------------------------------------
  1 | from diffusers import (
  2 |     StableDiffusionControlNetImg2ImgPipeline,
  3 |     ControlNetModel,
  4 |     TCDScheduler,
  5 |     AutoencoderTiny,
  6 | )
  7 | from compel import Compel
  8 | import torch
  9 | from pipelines.utils.canny_gpu import SobelOperator
 10 | 
 11 | try:
 12 |     import intel_extension_for_pytorch as ipex  # type: ignore
 13 | except:
 14 |     pass
 15 | 
 16 | from config import Args
 17 | from pydantic import BaseModel, Field
 18 | from util import ParamsModel
 19 | from PIL import Image
 20 | 
 21 | taesd_model = "madebyollin/taesd"
 22 | controlnet_model = "lllyasviel/control_v11p_sd15_canny"
 23 | base_model_id = "runwayml/stable-diffusion-v1-5"
 24 | pcm_base = "wangfuyun/PCM_Weights"
 25 | pcm_lora_ckpts = {
 26 |     "2-Step": ["pcm_sd15_smallcfg_2step_converted.safetensors", 2, 0.0],
 27 |     "4-Step": ["pcm_sd15_smallcfg_4step_converted.safetensors", 4, 0.0],
 28 |     "8-Step": ["pcm_sd15_smallcfg_8step_converted.safetensors", 8, 0.0],
 29 |     "16-Step": ["pcm_sd15_smallcfg_16step_converted.safetensors", 16, 0.0],
 30 |     "Normal CFG 4-Step": ["pcm_sd15_normalcfg_4step_converted.safetensors", 4, 7.5],
 31 |     "Normal CFG 8-Step": ["pcm_sd15_normalcfg_8step_converted.safetensors", 8, 7.5],
 32 |     "Normal CFG 16-Step": ["pcm_sd15_normalcfg_16step_converted.safetensors", 16, 7.5],
 33 | }
 34 | default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
 35 | page_content = """
 36 | 
 37 | """
 38 | 
 39 | 
 40 | class Pipeline:
 41 |     class Info(BaseModel):
 42 |         name: str = "controlnet+loras+sd15"
 43 |         title: str = "LCM + LoRA + Controlnet"
 44 |         description: str = "Generates an image from a text prompt"
 45 |         input_mode: str = "image"
 46 |         page_content: str = page_content
 47 | 
 48 |     class InputParams(ParamsModel):
 49 |         prompt: str = Field(
 50 |             default_prompt,
 51 |             title="Prompt",
 52 |             field="textarea",
 53 |             id="prompt",
 54 |         )
 55 |         lora_ckpt_id: str = Field(
 56 |             "4-Step",
 57 |             title="PCM Base Model",
 58 |             values=list(pcm_lora_ckpts.keys()),
 59 |             field="select",
 60 |             id="lora_ckpt_id",
 61 |         )
 62 |         seed: int = Field(
 63 |             2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
 64 |         )
 65 |         width: int = Field(
 66 |             512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
 67 |         )
 68 |         height: int = Field(
 69 |             512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
 70 |         )
 71 |         strength: float = Field(
 72 |             0.5,
 73 |             min=0.25,
 74 |             max=1.0,
 75 |             step=0.001,
 76 |             title="Strength",
 77 |             field="range",
 78 |             hide=True,
 79 |             id="strength",
 80 |         )
 81 |         controlnet_scale: float = Field(
 82 |             0.8,
 83 |             min=0,
 84 |             max=1.0,
 85 |             step=0.001,
 86 |             title="Controlnet Scale",
 87 |             field="range",
 88 |             hide=True,
 89 |             id="controlnet_scale",
 90 |         )
 91 |         controlnet_start: float = Field(
 92 |             0.0,
 93 |             min=0,
 94 |             max=1.0,
 95 |             step=0.001,
 96 |             title="Controlnet Start",
 97 |             field="range",
 98 |             hide=True,
 99 |             id="controlnet_start",
100 |         )
101 |         controlnet_end: float = Field(
102 |             1.0,
103 |             min=0,
104 |             max=1.0,
105 |             step=0.001,
106 |             title="Controlnet End",
107 |             field="range",
108 |             hide=True,
109 |             id="controlnet_end",
110 |         )
111 |         canny_low_threshold: float = Field(
112 |             0.31,
113 |             min=0,
114 |             max=1.0,
115 |             step=0.001,
116 |             title="Canny Low Threshold",
117 |             field="range",
118 |             hide=True,
119 |             id="canny_low_threshold",
120 |         )
121 |         canny_high_threshold: float = Field(
122 |             0.125,
123 |             min=0,
124 |             max=1.0,
125 |             step=0.001,
126 |             title="Canny High Threshold",
127 |             field="range",
128 |             hide=True,
129 |             id="canny_high_threshold",
130 |         )
131 |         debug_canny: bool = Field(
132 |             False,
133 |             title="Debug Canny",
134 |             field="checkbox",
135 |             hide=True,
136 |             id="debug_canny",
137 |         )
138 | 
139 |     def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
140 |         controlnet_canny = ControlNetModel.from_pretrained(
141 |             controlnet_model, torch_dtype=torch_dtype
142 |         ).to(device)
143 | 
144 |         self.pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
145 |             base_model_id,
146 |             safety_checker=None,
147 |             controlnet=controlnet_canny,
148 |         )
149 | 
150 |         self.canny_torch = SobelOperator(device=device)
151 | 
152 |         self.pipe.scheduler = TCDScheduler(
153 |             num_train_timesteps=1000,
154 |             beta_start=0.00085,
155 |             beta_end=0.012,
156 |             beta_schedule="scaled_linear",
157 |             timestep_spacing="trailing",
158 |         )
159 | 
160 |         self.pipe.set_progress_bar_config(disable=True)
161 |         if device.type != "mps":
162 |             self.pipe.unet.to(memory_format=torch.channels_last)
163 | 
164 |         if args.taesd:
165 |             self.pipe.vae = AutoencoderTiny.from_pretrained(
166 |                 taesd_model, torch_dtype=torch_dtype, use_safetensors=True
167 |             ).to(device)
168 | 
169 |         self.loaded_lora = "4-Step"
170 |         self.pipe.load_lora_weights(
171 |             pcm_base,
172 |             weight_name=pcm_lora_ckpts[self.loaded_lora][0],
173 |             subfolder="sd15",
174 |         )
175 |         self.pipe.to(device=device, dtype=torch_dtype).to(device)
176 |         if args.compel:
177 |             self.compel_proc = Compel(
178 |                 tokenizer=self.pipe.tokenizer,
179 |                 text_encoder=self.pipe.text_encoder,
180 |                 truncate_long_prompts=False,
181 |             )
182 |         if args.torch_compile:
183 |             self.pipe.unet = torch.compile(
184 |                 self.pipe.unet, mode="reduce-overhead", fullgraph=True
185 |             )
186 |             self.pipe.vae = torch.compile(
187 |                 self.pipe.vae, mode="reduce-overhead", fullgraph=True
188 |             )
189 |             self.pipe(
190 |                 prompt="warmup",
191 |                 image=[Image.new("RGB", (768, 768))],
192 |                 control_image=[Image.new("RGB", (768, 768))],
193 |             )
194 | 
195 |     def predict(self, params: "Pipeline.InputParams") -> Image.Image:
196 |         generator = torch.manual_seed(params.seed)
197 |         guidance_scale = pcm_lora_ckpts[params.lora_ckpt_id][2]
198 |         steps = pcm_lora_ckpts[params.lora_ckpt_id][1]
199 | 
200 |         if self.loaded_lora != params.lora_ckpt_id:
201 |             checkpoint = pcm_lora_ckpts[params.lora_ckpt_id][0]
202 |             self.pipe.load_lora_weights(
203 |                 pcm_base,
204 |                 weight_name=checkpoint,
205 |                 subfolder="sd15",
206 |             )
207 |             self.loaded_lora = params.lora_ckpt_id
208 | 
209 |         prompt_embeds = None
210 |         prompt = params.prompt
211 |         if hasattr(self, "compel_proc"):
212 |             prompt_embeds = self.compel_proc(prompt)
213 |             prompt = None
214 |         control_image = self.canny_torch(
215 |             params.image, params.canny_low_threshold, params.canny_high_threshold
216 |         )
217 |         strength = params.strength
218 | 
219 |         results = self.pipe(
220 |             image=params.image,
221 |             control_image=control_image,
222 |             prompt=prompt,
223 |             prompt_embeds=prompt_embeds,
224 |             generator=generator,
225 |             strength=strength,
226 |             num_inference_steps=steps,
227 |             guidance_scale=guidance_scale,
228 |             width=params.width,
229 |             height=params.height,
230 |             output_type="pil",
231 |             controlnet_conditioning_scale=params.controlnet_scale,
232 |             control_guidance_start=params.controlnet_start,
233 |             control_guidance_end=params.controlnet_end,
234 |         )
235 | 
236 |         result_image = results.images[0]
237 |         if params.debug_canny:
238 |             # paste control_image on top of result_image
239 |             w0, h0 = (200, 200)
240 |             control_image = control_image.resize((w0, h0))
241 |             w1, h1 = result_image.size
242 |             result_image.paste(control_image, (w1 - w0, h1 - h0))
243 | 
244 |         return result_image
245 | 


--------------------------------------------------------------------------------
/server/pipelines/img2img.py:
--------------------------------------------------------------------------------
  1 | from diffusers import (
  2 |     AutoPipelineForImage2Image,
  3 |     AutoencoderTiny,
  4 | )
  5 | from compel import Compel
  6 | import torch
  7 | 
  8 | try:
  9 |     import intel_extension_for_pytorch as ipex  # type: ignore
 10 | except:
 11 |     pass
 12 | 
 13 | import psutil
 14 | from config import Args
 15 | from pydantic import BaseModel, Field
 16 | from PIL import Image
 17 | from util import ParamsModel
 18 | import math
 19 | 
 20 | base_model = "SimianLuo/LCM_Dreamshaper_v7"
 21 | taesd_model = "madebyollin/taesd"
 22 | 
 23 | default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
 24 | page_content = """
 25 | <h1 class="text-3xl font-bold">Real-Time Latent Consistency Model</h1>
 26 | <h3 class="text-xl font-bold">Image-to-Image LCM</h3>
 27 | <p class="text-sm">
 28 |     This demo showcases
 29 |     <a
 30 |     href="https://huggingface.co/blog/lcm_lora"
 31 |     target="_blank"
 32 |     class="text-blue-500 underline hover:no-underline">LCM</a>
 33 | Image to Image pipeline using
 34 |     <a
 35 |     href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/lcm#performing-inference-with-lcm"
 36 |     target="_blank"
 37 |     class="text-blue-500 underline hover:no-underline">Diffusers</a
 38 |     > with a MJPEG stream server.
 39 | </p>
 40 | <p class="text-sm text-gray-500">
 41 |     Change the prompt to generate different images, accepts <a
 42 |     href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
 43 |     target="_blank"
 44 |     class="text-blue-500 underline hover:no-underline">Compel</a
 45 |     > syntax.
 46 | </p>
 47 | """
 48 | 
 49 | 
 50 | class Pipeline:
 51 |     class Info(BaseModel):
 52 |         name: str = "img2img"
 53 |         title: str = "Image-to-Image LCM"
 54 |         description: str = "Generates an image from a text prompt"
 55 |         input_mode: str = "image"
 56 |         page_content: str = page_content
 57 | 
 58 |     class InputParams(ParamsModel):
 59 |         prompt: str = Field(
 60 |             default_prompt,
 61 |             title="Prompt",
 62 |             field="textarea",
 63 |             id="prompt",
 64 |         )
 65 |         seed: int = Field(
 66 |             2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
 67 |         )
 68 |         steps: int = Field(
 69 |             4, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
 70 |         )
 71 |         width: int = Field(
 72 |             768, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
 73 |         )
 74 |         height: int = Field(
 75 |             768, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
 76 |         )
 77 |         guidance_scale: float = Field(
 78 |             0.2,
 79 |             min=0,
 80 |             max=20,
 81 |             step=0.001,
 82 |             title="Guidance Scale",
 83 |             field="range",
 84 |             hide=True,
 85 |             id="guidance_scale",
 86 |         )
 87 |         strength: float = Field(
 88 |             0.5,
 89 |             min=0.25,
 90 |             max=1.0,
 91 |             step=0.001,
 92 |             title="Strength",
 93 |             field="range",
 94 |             hide=True,
 95 |             id="strength",
 96 |         )
 97 | 
 98 |     def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
 99 |         self.pipe = AutoPipelineForImage2Image.from_pretrained(
100 |             base_model,
101 |             safety_checker=None,
102 |         )
103 |         if args.taesd:
104 |             self.pipe.vae = AutoencoderTiny.from_pretrained(
105 |                 taesd_model, torch_dtype=torch_dtype, use_safetensors=True
106 |             ).to(device)
107 | 
108 |         if args.sfast:
109 |             from sfast.compilers.stable_diffusion_pipeline_compiler import (
110 |                 compile,
111 |                 CompilationConfig,
112 |             )
113 | 
114 |             config = CompilationConfig.Default()
115 |             config.enable_xformers = True
116 |             config.enable_triton = True
117 |             config.enable_cuda_graph = True
118 |             self.pipe = compile(self.pipe, config=config)
119 | 
120 |         self.pipe.set_progress_bar_config(disable=True)
121 |         self.pipe.to(device=device, dtype=torch_dtype)
122 |         if device.type != "mps":
123 |             self.pipe.unet.to(memory_format=torch.channels_last)
124 | 
125 |         if args.torch_compile:
126 |             print("Running torch compile")
127 |             self.pipe.unet = torch.compile(
128 |                 self.pipe.unet, mode="reduce-overhead", fullgraph=True
129 |             )
130 |             self.pipe.vae = torch.compile(
131 |                 self.pipe.vae, mode="reduce-overhead", fullgraph=True
132 |             )
133 | 
134 |             self.pipe(
135 |                 prompt="warmup",
136 |                 image=[Image.new("RGB", (768, 768))],
137 |             )
138 | 
139 |         if args.compel:
140 |             self.compel_proc = Compel(
141 |                 tokenizer=self.pipe.tokenizer,
142 |                 text_encoder=self.pipe.text_encoder,
143 |                 truncate_long_prompts=False,
144 |             )
145 | 
146 |     def predict(self, params: "Pipeline.InputParams") -> Image.Image:
147 |         generator = torch.manual_seed(params.seed)
148 |         prompt_embeds = None
149 |         prompt = params.prompt
150 |         if hasattr(self, "compel_proc"):
151 |             prompt_embeds = self.compel_proc(params.prompt)
152 |             prompt = None
153 | 
154 |         steps = params.steps
155 |         strength = params.strength
156 |         if int(steps * strength) < 1:
157 |             steps = math.ceil(1 / max(0.10, strength))
158 | 
159 |         results = self.pipe(
160 |             image=params.image,
161 |             prompt=prompt,
162 |             prompt_embeds=prompt_embeds,
163 |             generator=generator,
164 |             strength=strength,
165 |             num_inference_steps=steps,
166 |             guidance_scale=params.guidance_scale,
167 |             width=params.width,
168 |             height=params.height,
169 |             output_type="pil",
170 |         )
171 | 
172 |         return results.images[0]
173 | 


--------------------------------------------------------------------------------
/server/pipelines/img2imgFlux.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | from optimum.quanto import freeze, qfloat8, quantize
  4 | from transformers.modeling_utils import PreTrainedModel
  5 | from diffusers import AutoencoderTiny
  6 | from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel
  7 | from diffusers.pipelines.flux.pipeline_flux_img2img import FluxImg2ImgPipeline
  8 | from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
  9 | from diffusers import FlowMatchEulerDiscreteScheduler, AutoencoderKL
 10 | 
 11 | 
 12 | from pruna import smash, SmashConfig
 13 | from pruna.telemetry import set_telemetry_metrics
 14 | 
 15 | set_telemetry_metrics(False)  # disable telemetry for current session
 16 | set_telemetry_metrics(False, set_as_default=True)  # disable telemetry globally
 17 | 
 18 | 
 19 | try:
 20 |     import intel_extension_for_pytorch as ipex  # type: ignore
 21 | except:
 22 |     pass
 23 | 
 24 | import psutil
 25 | from config import Args
 26 | from pydantic import BaseModel, Field
 27 | from PIL import Image
 28 | from pathlib import Path
 29 | from util import ParamsModel
 30 | import math
 31 | import gc
 32 | 
 33 | 
 34 | # model_path = "black-forest-labs/FLUX.1-dev"
 35 | model_path = "black-forest-labs/FLUX.1-schnell"
 36 | base_model_path = "black-forest-labs/FLUX.1-schnell"
 37 | taesd_path = "madebyollin/taef1"
 38 | subfolder = "transformer"
 39 | transformer_path = model_path
 40 | models_path = Path("models")
 41 | 
 42 | default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux"
 43 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
 44 | page_content = """
 45 | <h1 class="text-3xl font-bold">Real-Time FLUX</h1>
 46 | 
 47 | """
 48 | 
 49 | 
 50 | def flush():
 51 |     torch.cuda.empty_cache()
 52 |     gc.collect()
 53 | 
 54 | 
 55 | class Pipeline:
 56 |     class Info(BaseModel):
 57 |         name: str = "img2img"
 58 |         title: str = "Image-to-Image SDXL"
 59 |         description: str = "Generates an image from a text prompt"
 60 |         input_mode: str = "image"
 61 |         page_content: str = page_content
 62 | 
 63 |     class InputParams(ParamsModel):
 64 |         prompt: str = Field(
 65 |             default_prompt,
 66 |             title="Prompt",
 67 |             field="textarea",
 68 |             id="prompt",
 69 |         )
 70 |         seed: int = Field(
 71 |             2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
 72 |         )
 73 |         steps: int = Field(
 74 |             1, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
 75 |         )
 76 |         width: int = Field(
 77 |             1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
 78 |         )
 79 |         height: int = Field(
 80 |             1024, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
 81 |         )
 82 |         strength: float = Field(
 83 |             0.5,
 84 |             min=0.25,
 85 |             max=1.0,
 86 |             step=0.001,
 87 |             title="Strength",
 88 |             field="range",
 89 |             hide=True,
 90 |             id="strength",
 91 |         )
 92 |         guidance: float = Field(
 93 |             3.5,
 94 |             min=0,
 95 |             max=20,
 96 |             step=0.001,
 97 |             title="Guidance",
 98 |             hide=True,
 99 |             field="range",
100 |             id="guidance",
101 |         )
102 | 
103 |     def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
104 |         # ckpt_path = (
105 |         #     "https://huggingface.co/city96/FLUX.1-dev-gguf/blob/main/flux1-dev-Q2_K.gguf"
106 |         # )
107 |         print("Loading model")
108 | 
109 |         model_id = "black-forest-labs/FLUX.1-schnell"
110 |         model_revision = "refs/pr/1"
111 |         text_model_id = "openai/clip-vit-large-patch14"
112 |         model_data_type = torch.bfloat16
113 |         tokenizer = CLIPTokenizer.from_pretrained(
114 |             text_model_id, torch_dtype=model_data_type
115 |         )
116 |         text_encoder = CLIPTextModel.from_pretrained(
117 |             text_model_id, torch_dtype=model_data_type
118 |         )
119 | 
120 |         # 2
121 |         tokenizer_2 = T5TokenizerFast.from_pretrained(
122 |             model_id,
123 |             subfolder="tokenizer_2",
124 |             torch_dtype=model_data_type,
125 |             revision=model_revision,
126 |         )
127 |         text_encoder_2 = T5EncoderModel.from_pretrained(
128 |             model_id,
129 |             subfolder="text_encoder_2",
130 |             torch_dtype=model_data_type,
131 |             revision=model_revision,
132 |         )
133 | 
134 |         # Transformers
135 |         scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
136 |             model_id, subfolder="scheduler", revision=model_revision
137 |         )
138 |         transformer = FluxTransformer2DModel.from_pretrained(
139 |             model_id,
140 |             subfolder="transformer",
141 |             torch_dtype=model_data_type,
142 |             revision=model_revision,
143 |         )
144 | 
145 |         # VAE
146 |         # vae = AutoencoderKL.from_pretrained(
147 |         #     model_id,
148 |         #     subfolder="vae",
149 |         #     torch_dtype=model_data_type,
150 |         #     revision=model_revision,
151 |         # )
152 | 
153 |         vae = AutoencoderTiny.from_pretrained(
154 |             "madebyollin/taef1", torch_dtype=torch.bfloat16
155 |         )
156 | 
157 |         # Initialize the SmashConfig
158 |         smash_config = SmashConfig()
159 |         smash_config["quantizer"] = "quanto"
160 |         smash_config["quanto_calibrate"] = False
161 |         smash_config["quanto_weight_bits"] = "qint4"
162 |         # (
163 |         #     "qint4"  # "qfloat8"  # or "qint2", "qint4", "qint8"
164 |         # )
165 | 
166 |         transformer = smash(
167 |             model=transformer,
168 |             smash_config=smash_config,
169 |         )
170 |         text_encoder_2 = smash(
171 |             model=text_encoder_2,
172 |             smash_config=smash_config,
173 |         )
174 | 
175 |         pipe = FluxImg2ImgPipeline(
176 |             scheduler=scheduler,
177 |             text_encoder=text_encoder,
178 |             tokenizer=tokenizer,
179 |             text_encoder_2=text_encoder_2,
180 |             tokenizer_2=tokenizer_2,
181 |             vae=vae,
182 |             transformer=transformer,
183 |         )
184 | 
185 |         # if args.taesd:
186 |         #     pipe.vae = AutoencoderTiny.from_pretrained(
187 |         #         taesd_path, torch_dtype=torch.bfloat16, use_safetensors=True
188 |         #     )
189 |         # pipe.enable_model_cpu_offload()
190 |         pipe.text_encoder.to(device)
191 |         pipe.vae.to(device)
192 |         pipe.transformer.to(device)
193 |         pipe.text_encoder_2.to(device)
194 | 
195 |         # pipe.enable_model_cpu_offload()
196 |         # For added memory savings run this block, there is however a trade-off with speed.
197 |         # vae.enable_tiling()
198 |         # vae.enable_slicing()
199 |         # pipe.enable_sequential_cpu_offload()
200 | 
201 |         self.pipe = pipe
202 |         self.pipe.set_progress_bar_config(disable=True)
203 |         #     vae = AutoencoderKL.from_pretrained(
204 |         #         base_model_path, subfolder="vae", torch_dtype=torch_dtype
205 |         # )
206 | 
207 |     def predict(self, params: "Pipeline.InputParams") -> Image.Image:
208 |         generator = torch.manual_seed(params.seed)
209 |         steps = params.steps
210 |         strength = params.strength
211 |         prompt = params.prompt
212 |         guidance = params.guidance
213 | 
214 |         results = self.pipe(
215 |             image=params.image,
216 |             prompt=prompt,
217 |             generator=generator,
218 |             strength=strength,
219 |             num_inference_steps=steps,
220 |             guidance_scale=guidance,
221 |             width=params.width,
222 |             height=params.height,
223 |         )
224 |         return results.images[0]
225 | 


--------------------------------------------------------------------------------
/server/pipelines/img2imgSDTurbo.py:
--------------------------------------------------------------------------------
  1 | from diffusers import (
  2 |     AutoPipelineForImage2Image,
  3 |     AutoencoderTiny,
  4 | )
  5 | import torch
  6 | 
  7 | 
  8 | from config import Args
  9 | from pydantic import BaseModel, Field
 10 | from PIL import Image
 11 | from util import ParamsModel
 12 | import math
 13 | 
 14 | from pruna import smash, SmashConfig
 15 | 
 16 | base_model = "stabilityai/sd-turbo"
 17 | taesd_model = "madebyollin/taesd"
 18 | 
 19 | default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux"
 20 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
 21 | page_content = """
 22 | <h1 class="text-3xl font-bold">Real-Time SD-Turbo</h1>
 23 | <h3 class="text-xl font-bold">Image-to-Image</h3>
 24 | <p class="text-sm">
 25 |     This demo showcases
 26 |     <a
 27 |     href="https://huggingface.co/stabilityai/sdxl-turbo"
 28 |     target="_blank"
 29 |     class="text-blue-500 underline hover:no-underline">SDXL Turbo</a>
 30 | Image to Image pipeline using
 31 |     <a
 32 |     href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/sdxl_turbo"
 33 |     target="_blank"
 34 |     class="text-blue-500 underline hover:no-underline">Diffusers</a
 35 |     > with a MJPEG stream server.
 36 | </p>
 37 | <p class="text-sm text-gray-500">
 38 |     Change the prompt to generate different images, accepts <a
 39 |     href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
 40 |     target="_blank"
 41 |     class="text-blue-500 underline hover:no-underline">Compel</a
 42 |     > syntax.
 43 | </p>
 44 | """
 45 | 
 46 | 
 47 | class Pipeline:
 48 |     class Info(BaseModel):
 49 |         name: str = "img2img"
 50 |         title: str = "Image-to-Image SDXL"
 51 |         description: str = "Generates an image from a text prompt"
 52 |         input_mode: str = "image"
 53 |         page_content: str = page_content
 54 | 
 55 |     class InputParams(ParamsModel):
 56 |         prompt: str = Field(
 57 |             default_prompt,
 58 |             title="Prompt",
 59 |             field="textarea",
 60 |             id="prompt",
 61 |         )
 62 |         negative_prompt: str = Field(
 63 |             default_negative_prompt,
 64 |             title="Negative Prompt",
 65 |             field="textarea",
 66 |             id="negative_prompt",
 67 |             hide=True,
 68 |         )
 69 |         seed: int = Field(
 70 |             2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
 71 |         )
 72 |         steps: int = Field(
 73 |             1, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
 74 |         )
 75 |         width: int = Field(
 76 |             512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
 77 |         )
 78 |         height: int = Field(
 79 |             512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
 80 |         )
 81 |         strength: float = Field(
 82 |             0.5,
 83 |             min=0.25,
 84 |             max=1.0,
 85 |             step=0.001,
 86 |             title="Strength",
 87 |             field="range",
 88 |             hide=True,
 89 |             id="strength",
 90 |         )
 91 | 
 92 |     def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
 93 |         self.pipe = AutoPipelineForImage2Image.from_pretrained(
 94 |             base_model,
 95 |             safety_checker=None,
 96 |         )
 97 |         if args.taesd:
 98 |             self.pipe.vae = AutoencoderTiny.from_pretrained(
 99 |                 taesd_model, torch_dtype=torch_dtype, use_safetensors=True
100 |             ).to(device)
101 | 
102 |         if args.pruna:
103 |             # Create and smash your model
104 |             smash_config = SmashConfig()
105 |             # smash_config["cacher"] = "deepcache"
106 |             smash_config["compiler"] = "stable_fast"
107 |             self.pipe = smash(model=self.pipe, smash_config=smash_config)
108 | 
109 |         self.pipe.set_progress_bar_config(disable=True)
110 |         self.pipe.to(device=device, dtype=torch_dtype)
111 |         # if device.type != "mps":
112 |         #     self.pipe.unet.to(memory_format=torch.channels_last)
113 | 
114 |         if args.torch_compile:
115 |             print("Running torch compile")
116 |             self.pipe.unet = torch.compile(
117 |                 self.pipe.unet, mode="reduce-overhead", fullgraph=True
118 |             )
119 |             self.pipe.vae = torch.compile(
120 |                 self.pipe.vae, mode="reduce-overhead", fullgraph=True
121 |             )
122 | 
123 |             self.pipe(
124 |                 prompt="warmup",
125 |                 image=[Image.new("RGB", (768, 768))],
126 |             )
127 |         if args.compel:
128 |             from compel import Compel
129 | 
130 |             self.pipe.compel_proc = Compel(
131 |                 tokenizer=self.pipe.tokenizer,
132 |                 text_encoder=self.pipe.text_encoder,
133 |                 truncate_long_prompts=True,
134 |             )
135 | 
136 |     def predict(self, params: "Pipeline.InputParams") -> Image.Image:
137 |         generator = torch.manual_seed(params.seed)
138 |         steps = params.steps
139 |         strength = params.strength
140 |         if int(steps * strength) < 1:
141 |             steps = math.ceil(1 / max(0.10, strength))
142 | 
143 |         prompt = params.prompt
144 |         prompt_embeds = None
145 |         if hasattr(self.pipe, "compel_proc"):
146 |             prompt_embeds = self.pipe.compel_proc(
147 |                 [params.prompt, params.negative_prompt]
148 |             )
149 |             prompt = None
150 | 
151 |         results = self.pipe(
152 |             image=params.image,
153 |             prompt_embeds=prompt_embeds,
154 |             prompt=prompt,
155 |             negative_prompt=params.negative_prompt,
156 |             generator=generator,
157 |             strength=strength,
158 |             num_inference_steps=steps,
159 |             guidance_scale=1.1,
160 |             width=params.width,
161 |             height=params.height,
162 |             output_type="pil",
163 |         )
164 | 
165 |         return results.images[0]
166 | 


--------------------------------------------------------------------------------
/server/pipelines/img2imgSDXL-Lightning.py:
--------------------------------------------------------------------------------
  1 | from diffusers import (
  2 |     AutoPipelineForImage2Image,
  3 |     AutoencoderTiny,
  4 |     AutoencoderKL,
  5 |     UNet2DConditionModel,
  6 |     EulerDiscreteScheduler,
  7 | )
  8 | from compel import Compel, ReturnedEmbeddingsType
  9 | import torch
 10 | 
 11 | try:
 12 |     import intel_extension_for_pytorch as ipex  # type: ignore
 13 | except:
 14 |     pass
 15 | 
 16 | from safetensors.torch import load_file
 17 | from huggingface_hub import hf_hub_download
 18 | from config import Args
 19 | from pydantic import BaseModel, Field
 20 | from PIL import Image
 21 | from util import ParamsModel
 22 | import math
 23 | from pruna import SmashConfig, smash
 24 | 
 25 | base = "stabilityai/stable-diffusion-xl-base-1.0"
 26 | repo = "ByteDance/SDXL-Lightning"
 27 | ckpt = "sdxl_lightning_2step_unet.safetensors"
 28 | taesd_model = "madebyollin/taesdxl"
 29 | NUM_STEPS = 2
 30 | 
 31 | default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux"
 32 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
 33 | page_content = """
 34 | <h1 class="text-3xl font-bold">Real-Time SDXL Lightning</h1>
 35 | <h3 class="text-xl font-bold">Image-to-Image</h3>
 36 | <p class="text-sm">
 37 |     This demo showcases
 38 |     <a
 39 |     href="https://huggingface.co/stabilityai/sdxl-turbo"
 40 |     target="_blank"
 41 |     class="text-blue-500 underline hover:no-underline">SDXL Turbo</a>
 42 | Image to Image pipeline using
 43 |     <a
 44 |     href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/sdxl_turbo"
 45 |     target="_blank"
 46 |     class="text-blue-500 underline hover:no-underline">Diffusers</a
 47 |     > with a MJPEG stream server.
 48 | </p>
 49 | <p class="text-sm text-gray-500">
 50 |     Change the prompt to generate different images, accepts <a
 51 |     href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
 52 |     target="_blank"
 53 |     class="text-blue-500 underline hover:no-underline">Compel</a
 54 |     > syntax.
 55 | </p>
 56 | """
 57 | 
 58 | 
 59 | class Pipeline:
 60 |     class Info(BaseModel):
 61 |         name: str = "img2img"
 62 |         title: str = "Image-to-Image SDXL-Lightning"
 63 |         description: str = "Generates an image from a text prompt"
 64 |         input_mode: str = "image"
 65 |         page_content: str = page_content
 66 | 
 67 |     class InputParams(ParamsModel):
 68 |         prompt: str = Field(
 69 |             default_prompt,
 70 |             title="Prompt",
 71 |             field="textarea",
 72 |             id="prompt",
 73 |         )
 74 |         negative_prompt: str = Field(
 75 |             default_negative_prompt,
 76 |             title="Negative Prompt",
 77 |             field="textarea",
 78 |             id="negative_prompt",
 79 |             hide=True,
 80 |         )
 81 |         seed: int = Field(
 82 |             2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
 83 |         )
 84 |         steps: int = Field(
 85 |             1, min=1, max=10, title="Steps", field="range", hide=True, id="steps"
 86 |         )
 87 |         width: int = Field(
 88 |             1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
 89 |         )
 90 |         height: int = Field(
 91 |             1024, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
 92 |         )
 93 |         guidance_scale: float = Field(
 94 |             0.0,
 95 |             min=0,
 96 |             max=1,
 97 |             step=0.001,
 98 |             title="Guidance Scale",
 99 |             field="range",
100 |             hide=True,
101 |             id="guidance_scale",
102 |         )
103 |         strength: float = Field(
104 |             0.5,
105 |             min=0.25,
106 |             max=1.0,
107 |             step=0.001,
108 |             title="Strength",
109 |             field="range",
110 |             hide=True,
111 |             id="strength",
112 |         )
113 | 
114 |     def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
115 |         if args.taesd:
116 |             vae = AutoencoderTiny.from_pretrained(
117 |                 taesd_model, torch_dtype=torch_dtype, use_safetensors=True
118 |             )
119 |         else:
120 |             vae = AutoencoderKL.from_pretrained(
121 |                 "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype
122 |             )
123 | 
124 |         unet = UNet2DConditionModel.from_config(base, subfolder="unet")
125 |         unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device=device.type))
126 |         self.pipe = AutoPipelineForImage2Image.from_pretrained(
127 |             base,
128 |             unet=unet,
129 |             torch_dtype=torch_dtype,
130 |             variant="fp16",
131 |             safety_checker=False,
132 |             vae=vae,
133 |         )
134 |         # Ensure sampler uses "trailing" timesteps.
135 |         self.pipe.scheduler = EulerDiscreteScheduler.from_config(
136 |             self.pipe.scheduler.config, timestep_spacing="trailing"
137 |         )
138 | 
139 |         if args.pruna:
140 |             # Create and smash your model
141 |             smash_config = SmashConfig()
142 |             smash_config["cacher"] = "deepcache"
143 |             smash_config["compiler"] = "stable_fast"
144 |             self.pipe = smash(model=self.pipe, smash_config=smash_config)
145 | 
146 |         if args.sfast:
147 |             from sfast.compilers.stable_diffusion_pipeline_compiler import (
148 |                 compile,
149 |                 CompilationConfig,
150 |             )
151 | 
152 |             config = CompilationConfig.Default()
153 |             config.enable_xformers = True
154 |             config.enable_triton = True
155 |             config.enable_cuda_graph = True
156 |             self.pipe = compile(self.pipe, config=config)
157 | 
158 |         self.pipe.set_progress_bar_config(disable=True)
159 |         self.pipe.to(device=device, dtype=torch_dtype)
160 |         if device.type != "mps":
161 |             self.pipe.unet.to(memory_format=torch.channels_last)
162 | 
163 |         if args.torch_compile:
164 |             print("Running torch compile")
165 |             self.pipe.unet = torch.compile(
166 |                 self.pipe.unet, mode="reduce-overhead", fullgraph=True
167 |             )
168 |             self.pipe.vae = torch.compile(
169 |                 self.pipe.vae, mode="reduce-overhead", fullgraph=True
170 |             )
171 |             self.pipe(
172 |                 prompt="warmup",
173 |                 image=[Image.new("RGB", (768, 768))],
174 |             )
175 | 
176 |         if args.compel:
177 |             self.pipe.compel_proc = Compel(
178 |                 tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
179 |                 text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
180 |                 returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
181 |                 requires_pooled=[False, True],
182 |             )
183 | 
184 |     def predict(self, params: "Pipeline.InputParams") -> Image.Image:
185 |         generator = torch.manual_seed(params.seed)
186 |         prompt = params.prompt
187 |         negative_prompt = params.negative_prompt
188 |         prompt_embeds = None
189 |         pooled_prompt_embeds = None
190 |         negative_prompt_embeds = None
191 |         negative_pooled_prompt_embeds = None
192 |         if hasattr(self.pipe, "compel_proc"):
193 |             _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
194 |                 [params.prompt, params.negative_prompt]
195 |             )
196 |             prompt = None
197 |             negative_prompt = None
198 |             prompt_embeds = _prompt_embeds[0:1]
199 |             pooled_prompt_embeds = pooled_prompt_embeds[0:1]
200 |             negative_prompt_embeds = _prompt_embeds[1:2]
201 |             negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2]
202 | 
203 |         steps = params.steps
204 |         strength = params.strength
205 |         if int(steps * strength) < 1:
206 |             steps = math.ceil(1 / max(0.10, strength))
207 | 
208 |         results = self.pipe(
209 |             image=params.image,
210 |             prompt=prompt,
211 |             negative_prompt=negative_prompt,
212 |             prompt_embeds=prompt_embeds,
213 |             pooled_prompt_embeds=pooled_prompt_embeds,
214 |             negative_prompt_embeds=negative_prompt_embeds,
215 |             negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
216 |             generator=generator,
217 |             strength=strength,
218 |             num_inference_steps=steps,
219 |             guidance_scale=params.guidance_scale,
220 |             width=params.width,
221 |             height=params.height,
222 |             output_type="pil",
223 |         )
224 | 
225 |         return results.images[0]
226 | 


--------------------------------------------------------------------------------
/server/pipelines/img2imgSDXLTurbo.py:
--------------------------------------------------------------------------------
  1 | from diffusers import (
  2 |     AutoPipelineForImage2Image,
  3 |     AutoencoderTiny,
  4 | )
  5 | from compel import Compel, ReturnedEmbeddingsType
  6 | import torch
  7 | 
  8 | try:
  9 |     import intel_extension_for_pytorch as ipex  # type: ignore
 10 | except:
 11 |     pass
 12 | 
 13 | import psutil
 14 | from config import Args
 15 | from pydantic import BaseModel, Field
 16 | from PIL import Image
 17 | from util import ParamsModel
 18 | import math
 19 | 
 20 | from pruna import smash, SmashConfig
 21 | from pruna.telemetry import set_telemetry_metrics
 22 | 
 23 | set_telemetry_metrics(False)  # disable telemetry for current session
 24 | set_telemetry_metrics(False, set_as_default=True)  # disable telemetry globally
 25 | 
 26 | 
 27 | base_model = "stabilityai/sdxl-turbo"
 28 | taesd_model = "madebyollin/taesdxl"
 29 | 
 30 | default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux"
 31 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
 32 | page_content = """
 33 | <h1 class="text-3xl font-bold">Real-Time SDXL Turbo</h1>
 34 | <h3 class="text-xl font-bold">Image-to-Image</h3>
 35 | <p class="text-sm">
 36 |     This demo showcases
 37 |     <a
 38 |     href="https://huggingface.co/stabilityai/sdxl-turbo"
 39 |     target="_blank"
 40 |     class="text-blue-500 underline hover:no-underline">SDXL Turbo</a>
 41 | Image to Image pipeline using
 42 |     <a
 43 |     href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/sdxl_turbo"
 44 |     target="_blank"
 45 |     class="text-blue-500 underline hover:no-underline">Diffusers</a
 46 |     > with a MJPEG stream server.
 47 | </p>
 48 | <p class="text-sm text-gray-500">
 49 |     Change the prompt to generate different images, accepts <a
 50 |     href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
 51 |     target="_blank"
 52 |     class="text-blue-500 underline hover:no-underline">Compel</a
 53 |     > syntax.
 54 | </p>
 55 | """
 56 | 
 57 | 
 58 | class Pipeline:
 59 |     class Info(BaseModel):
 60 |         name: str = "img2img"
 61 |         title: str = "Image-to-Image SDXL"
 62 |         description: str = "Generates an image from a text prompt"
 63 |         input_mode: str = "image"
 64 |         page_content: str = page_content
 65 | 
 66 |     class InputParams(ParamsModel):
 67 |         prompt: str = Field(
 68 |             default_prompt,
 69 |             title="Prompt",
 70 |             field="textarea",
 71 |             id="prompt",
 72 |         )
 73 |         negative_prompt: str = Field(
 74 |             default_negative_prompt,
 75 |             title="Negative Prompt",
 76 |             field="textarea",
 77 |             id="negative_prompt",
 78 |             hide=True,
 79 |         )
 80 |         seed: int = Field(
 81 |             2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
 82 |         )
 83 |         steps: int = Field(
 84 |             1, min=1, max=10, title="Steps", field="range", hide=True, id="steps"
 85 |         )
 86 |         width: int = Field(
 87 |             768, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
 88 |         )
 89 |         height: int = Field(
 90 |             768, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
 91 |         )
 92 |         guidance_scale: float = Field(
 93 |             1.0,
 94 |             min=0,
 95 |             max=1,
 96 |             step=0.001,
 97 |             title="Guidance Scale",
 98 |             field="range",
 99 |             hide=True,
100 |             id="guidance_scale",
101 |         )
102 |         strength: float = Field(
103 |             0.5,
104 |             min=0.25,
105 |             max=1.0,
106 |             step=0.001,
107 |             title="Strength",
108 |             field="range",
109 |             hide=True,
110 |             id="strength",
111 |         )
112 | 
113 |     def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
114 |         base_pipe = AutoPipelineForImage2Image.from_pretrained(
115 |             base_model,
116 |             safety_checker=None,
117 |         )
118 |         self.pipe = None
119 |         if args.taesd:
120 |             self.pipe.vae = AutoencoderTiny.from_pretrained(
121 |                 taesd_model, torch_dtype=torch_dtype, use_safetensors=True
122 |             ).to(device)
123 | 
124 |         if args.sfast:
125 |             from sfast.compilers.stable_diffusion_pipeline_compiler import (
126 |                 compile,
127 |                 CompilationConfig,
128 |             )
129 | 
130 |             config = CompilationConfig.Default()
131 |             config.enable_xformers = True
132 |             config.enable_triton = True
133 |             config.enable_cuda_graph = True
134 |             self.pipe = compile(self.pipe, config=config)
135 | 
136 |         if device.type != "mps":
137 |             self.pipe.unet.to(memory_format=torch.channels_last)
138 | 
139 |         if args.pruna:
140 |             # Create and smash your model
141 |             smash_config = SmashConfig()
142 |             smash_config["cacher"] = "deepcache"
143 |             smash_config["compiler"] = "stable_fast"
144 |             self.pipe = smash(model=base_pipe, smash_config=smash_config)
145 | 
146 |         if args.torch_compile:
147 |             print("Running torch compile")
148 |             self.pipe.unet = torch.compile(
149 |                 self.pipe.unet, mode="reduce-overhead", fullgraph=True
150 |             )
151 |             self.pipe.vae = torch.compile(
152 |                 self.pipe.vae, mode="reduce-overhead", fullgraph=True
153 |             )
154 |             self.pipe(
155 |                 prompt="warmup",
156 |                 image=[Image.new("RGB", (768, 768))],
157 |             )
158 | 
159 |         if args.compel:
160 |             self.pipe.compel_proc = Compel(
161 |                 tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
162 |                 text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
163 |                 returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
164 |                 requires_pooled=[False, True],
165 |             )
166 | 
167 |         self.pipe.set_progress_bar_config(disable=True)
168 |         self.pipe.to(device=device, dtype=torch_dtype)
169 | 
170 |     def predict(self, params: "Pipeline.InputParams") -> Image.Image:
171 |         generator = torch.manual_seed(params.seed)
172 |         prompt = params.prompt
173 |         negative_prompt = params.negative_prompt
174 |         prompt_embeds = None
175 |         pooled_prompt_embeds = None
176 |         negative_prompt_embeds = None
177 |         negative_pooled_prompt_embeds = None
178 |         if hasattr(self.pipe, "compel_proc"):
179 |             _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
180 |                 [params.prompt, params.negative_prompt]
181 |             )
182 |             prompt = None
183 |             negative_prompt = None
184 |             prompt_embeds = _prompt_embeds[0:1]
185 |             pooled_prompt_embeds = pooled_prompt_embeds[0:1]
186 |             negative_prompt_embeds = _prompt_embeds[1:2]
187 |             negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2]
188 | 
189 |         steps = params.steps
190 |         strength = params.strength
191 |         if int(steps * strength) < 1:
192 |             steps = math.ceil(1 / max(0.10, strength))
193 | 
194 |         results = self.pipe(
195 |             image=params.image,
196 |             prompt=prompt,
197 |             negative_prompt=negative_prompt,
198 |             prompt_embeds=prompt_embeds,
199 |             pooled_prompt_embeds=pooled_prompt_embeds,
200 |             negative_prompt_embeds=negative_prompt_embeds,
201 |             negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
202 |             generator=generator,
203 |             strength=strength,
204 |             num_inference_steps=steps,
205 |             guidance_scale=params.guidance_scale,
206 |             width=params.width,
207 |             height=params.height,
208 |             output_type="pil",
209 |         )
210 | 
211 |         return results.images[0]
212 | 


--------------------------------------------------------------------------------
/server/pipelines/img2imgSDXS512.py:
--------------------------------------------------------------------------------
  1 | from diffusers import AutoPipelineForImage2Image, AutoencoderTiny
  2 | from compel import Compel
  3 | import torch
  4 | 
  5 | try:
  6 |     import intel_extension_for_pytorch as ipex  # type: ignore
  7 | except:
  8 |     pass
  9 | 
 10 | import psutil
 11 | from config import Args
 12 | from pydantic import BaseModel, Field
 13 | from PIL import Image
 14 | from util import ParamsModel
 15 | import math
 16 | 
 17 | base_model = "IDKiro/sdxs-512-0.9"
 18 | taesd_model = "madebyollin/taesd"
 19 | 
 20 | default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
 21 | page_content = """
 22 | <h1 class="text-3xl font-bold">Real-Time Latent SDXS</h1>
 23 | <h3 class="text-xl font-bold">Image-to-Image SDXS</h3>
 24 | <p class="text-sm">
 25 |     This demo showcases
 26 |     <a
 27 |     href="https://huggingface.co/blog/lcm_lora"
 28 |     target="_blank"
 29 |     class="text-blue-500 underline hover:no-underline">LCM</a>
 30 | Image to Image pipeline using
 31 |     <a
 32 |     href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/lcm#performing-inference-with-lcm"
 33 |     target="_blank"
 34 |     class="text-blue-500 underline hover:no-underline">Diffusers</a
 35 |     > with a MJPEG stream server.
 36 | </p>
 37 | <p class="text-sm text-gray-500">
 38 |     Change the prompt to generate different images, accepts <a
 39 |     href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
 40 |     target="_blank"
 41 |     class="text-blue-500 underline hover:no-underline">Compel</a
 42 |     > syntax.
 43 | </p>
 44 | """
 45 | 
 46 | 
 47 | class Pipeline:
 48 |     class Info(BaseModel):
 49 |         name: str = "img2img"
 50 |         title: str = "Image-to-Image SDXS"
 51 |         description: str = "Generates an image from a text prompt"
 52 |         input_mode: str = "image"
 53 |         page_content: str = page_content
 54 | 
 55 |     class InputParams(ParamsModel):
 56 |         prompt: str = Field(
 57 |             default_prompt,
 58 |             title="Prompt",
 59 |             field="textarea",
 60 |             id="prompt",
 61 |         )
 62 |         seed: int = Field(
 63 |             2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
 64 |         )
 65 |         steps: int = Field(
 66 |             1, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
 67 |         )
 68 |         width: int = Field(
 69 |             512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
 70 |         )
 71 |         height: int = Field(
 72 |             512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
 73 |         )
 74 |         guidance_scale: float = Field(
 75 |             0.0,
 76 |             min=0,
 77 |             max=20,
 78 |             step=0.001,
 79 |             title="Guidance Scale",
 80 |             field="range",
 81 |             hide=True,
 82 |             id="guidance_scale",
 83 |         )
 84 |         strength: float = Field(
 85 |             0.5,
 86 |             min=0.25,
 87 |             max=1.0,
 88 |             step=0.001,
 89 |             title="Strength",
 90 |             field="range",
 91 |             hide=True,
 92 |             id="strength",
 93 |         )
 94 | 
 95 |     def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
 96 |         self.pipe = AutoPipelineForImage2Image.from_pretrained(
 97 |             base_model,
 98 |             safety_checker=None,
 99 |         )
100 |         if args.taesd:
101 |             self.pipe.vae = AutoencoderTiny.from_pretrained(
102 |                 taesd_model, torch_dtype=torch_dtype, use_safetensors=True
103 |             ).to(device)
104 | 
105 |         if args.sfast:
106 |             from sfast.compilers.stable_diffusion_pipeline_compiler import (
107 |                 compile,
108 |                 CompilationConfig,
109 |             )
110 | 
111 |             config = CompilationConfig.Default()
112 |             config.enable_xformers = True
113 |             config.enable_triton = True
114 |             config.enable_cuda_graph = True
115 |             self.pipe = compile(self.pipe, config=config)
116 | 
117 |         self.pipe.set_progress_bar_config(disable=True)
118 |         self.pipe.to(device=device, dtype=torch_dtype)
119 |         if device.type != "mps":
120 |             self.pipe.unet.to(memory_format=torch.channels_last)
121 | 
122 |         if args.torch_compile:
123 |             print("Running torch compile")
124 |             self.pipe.unet = torch.compile(
125 |                 self.pipe.unet, mode="reduce-overhead", fullgraph=True
126 |             )
127 |             self.pipe.vae = torch.compile(
128 |                 self.pipe.vae, mode="reduce-overhead", fullgraph=True
129 |             )
130 | 
131 |             self.pipe(
132 |                 prompt="warmup",
133 |                 image=[Image.new("RGB", (768, 768))],
134 |             )
135 | 
136 |         if args.compel:
137 |             self.compel_proc = Compel(
138 |                 tokenizer=self.pipe.tokenizer,
139 |                 text_encoder=self.pipe.text_encoder,
140 |                 truncate_long_prompts=False,
141 |             )
142 | 
143 |     def predict(self, params: "Pipeline.InputParams") -> Image.Image:
144 |         generator = torch.manual_seed(params.seed)
145 |         prompt_embeds = None
146 |         prompt = params.prompt
147 |         if hasattr(self, "compel_proc"):
148 |             prompt_embeds = self.compel_proc(params.prompt)
149 |             prompt = None
150 | 
151 |         results = self.pipe(
152 |             image=params.image,
153 |             prompt=prompt,
154 |             prompt_embeds=prompt_embeds,
155 |             generator=generator,
156 |             strength=params.strength,
157 |             num_inference_steps=params.steps,
158 |             guidance_scale=params.guidance_scale,
159 |             width=params.width,
160 |             height=params.height,
161 |             output_type="pil",
162 |         )
163 |         return results.images[0]
164 | 


--------------------------------------------------------------------------------
/server/pipelines/img2imgSegmindVegaRT.py:
--------------------------------------------------------------------------------
  1 | from diffusers import (
  2 |     AutoPipelineForImage2Image,
  3 |     LCMScheduler,
  4 |     AutoencoderTiny,
  5 | )
  6 | from compel import Compel, ReturnedEmbeddingsType
  7 | import torch
  8 | 
  9 | try:
 10 |     import intel_extension_for_pytorch as ipex  # type: ignore
 11 | except:
 12 |     pass
 13 | 
 14 | import psutil
 15 | from config import Args
 16 | from pydantic import BaseModel, Field
 17 | from util import ParamsModel
 18 | from PIL import Image
 19 | import math
 20 | 
 21 | base_model = "segmind/Segmind-Vega"
 22 | lora_model = "segmind/Segmind-VegaRT"
 23 | taesd_model = "madebyollin/taesdxl"
 24 | 
 25 | default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux"
 26 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
 27 | page_content = """
 28 | <h1 class="text-3xl font-bold">Real-Time SegmindVegaRT</h1>
 29 | <h3 class="text-xl font-bold">Image-to-Image</h3>
 30 | <p class="text-sm">
 31 |     This demo showcases
 32 |     <a
 33 |     href="https://huggingface.co/segmind/Segmind-VegaRT"
 34 |     target="_blank"
 35 |     class="text-blue-500 underline hover:no-underline">SegmindVegaRT</a>
 36 | Image to Image pipeline using
 37 |     <a
 38 |     href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/sdxl_turbo"
 39 |     target="_blank"
 40 |     class="text-blue-500 underline hover:no-underline">Diffusers</a
 41 |     > with a MJPEG stream server.
 42 | </p>
 43 | <p class="text-sm text-gray-500">
 44 |     Change the prompt to generate different images, accepts <a
 45 |     href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
 46 |     target="_blank"
 47 |     class="text-blue-500 underline hover:no-underline">Compel</a
 48 |     > syntax.
 49 | </p>
 50 | """
 51 | 
 52 | 
 53 | class Pipeline:
 54 |     class Info(BaseModel):
 55 |         name: str = "img2img"
 56 |         title: str = "Image-to-Image Playground 256"
 57 |         description: str = "Generates an image from a text prompt"
 58 |         input_mode: str = "image"
 59 |         page_content: str = page_content
 60 | 
 61 |     class InputParams(ParamsModel):
 62 |         prompt: str = Field(
 63 |             default_prompt,
 64 |             title="Prompt",
 65 |             field="textarea",
 66 |             id="prompt",
 67 |         )
 68 |         negative_prompt: str = Field(
 69 |             default_negative_prompt,
 70 |             title="Negative Prompt",
 71 |             field="textarea",
 72 |             id="negative_prompt",
 73 |             hide=True,
 74 |         )
 75 |         seed: int = Field(
 76 |             2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
 77 |         )
 78 |         steps: int = Field(
 79 |             1, min=1, max=10, title="Steps", field="range", hide=True, id="steps"
 80 |         )
 81 |         width: int = Field(
 82 |             1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
 83 |         )
 84 |         height: int = Field(
 85 |             1024, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
 86 |         )
 87 |         guidance_scale: float = Field(
 88 |             0.0,
 89 |             min=0,
 90 |             max=1,
 91 |             step=0.001,
 92 |             title="Guidance Scale",
 93 |             field="range",
 94 |             hide=True,
 95 |             id="guidance_scale",
 96 |         )
 97 |         strength: float = Field(
 98 |             0.5,
 99 |             min=0.25,
100 |             max=1.0,
101 |             step=0.001,
102 |             title="Strength",
103 |             field="range",
104 |             hide=True,
105 |             id="strength",
106 |         )
107 | 
108 |     def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
109 |         self.pipe = AutoPipelineForImage2Image.from_pretrained(
110 |             base_model,
111 |             safety_checker=None,
112 |             variant="fp16",
113 |         )
114 |         if args.taesd:
115 |             self.pipe.vae = AutoencoderTiny.from_pretrained(
116 |                 taesd_model, torch_dtype=torch_dtype, use_safetensors=True
117 |             ).to(device)
118 | 
119 |         self.pipe.load_lora_weights(lora_model)
120 |         self.pipe.fuse_lora()
121 |         self.pipe.scheduler = LCMScheduler.from_pretrained(
122 |             base_model, subfolder="scheduler"
123 |         )
124 |         if args.sfast:
125 |             from sfast.compilers.stable_diffusion_pipeline_compiler import (
126 |                 compile,
127 |                 CompilationConfig,
128 |             )
129 | 
130 |             config = CompilationConfig.Default()
131 |             config.enable_xformers = True
132 |             config.enable_triton = True
133 |             config.enable_cuda_graph = True
134 |             self.pipe = compile(self.pipe, config=config)
135 | 
136 |         self.pipe.set_progress_bar_config(disable=True)
137 |         self.pipe.to(device=device, dtype=torch_dtype)
138 |         if device.type != "mps":
139 |             self.pipe.unet.to(memory_format=torch.channels_last)
140 | 
141 |         if args.torch_compile:
142 |             print("Running torch compile")
143 |             self.pipe.unet = torch.compile(
144 |                 self.pipe.unet, mode="reduce-overhead", fullgraph=False
145 |             )
146 |             self.pipe.vae = torch.compile(
147 |                 self.pipe.vae, mode="reduce-overhead", fullgraph=False
148 |             )
149 | 
150 |             self.pipe(
151 |                 prompt="warmup",
152 |                 image=[Image.new("RGB", (768, 768))],
153 |             )
154 |         if args.compel:
155 |             self.pipe.compel_proc = Compel(
156 |                 tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
157 |                 text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
158 |                 returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
159 |                 requires_pooled=[False, True],
160 |             )
161 | 
162 |     def predict(self, params: "Pipeline.InputParams") -> Image.Image:
163 |         generator = torch.manual_seed(params.seed)
164 |         prompt = params.prompt
165 |         negative_prompt = params.negative_prompt
166 |         prompt_embeds = None
167 |         pooled_prompt_embeds = None
168 |         negative_prompt_embeds = None
169 |         negative_pooled_prompt_embeds = None
170 |         if hasattr(self.pipe, "compel_proc"):
171 |             _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
172 |                 [params.prompt, params.negative_prompt]
173 |             )
174 |             prompt = None
175 |             negative_prompt = None
176 |             prompt_embeds = _prompt_embeds[0:1]
177 |             pooled_prompt_embeds = pooled_prompt_embeds[0:1]
178 |             negative_prompt_embeds = _prompt_embeds[1:2]
179 |             negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2]
180 | 
181 |         steps = params.steps
182 |         strength = params.strength
183 |         if int(steps * strength) < 1:
184 |             steps = math.ceil(1 / max(0.10, strength))
185 | 
186 |         results = self.pipe(
187 |             image=params.image,
188 |             prompt=prompt,
189 |             negative_prompt=negative_prompt,
190 |             prompt_embeds=prompt_embeds,
191 |             pooled_prompt_embeds=pooled_prompt_embeds,
192 |             negative_prompt_embeds=negative_prompt_embeds,
193 |             negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
194 |             generator=generator,
195 |             strength=strength,
196 |             num_inference_steps=steps,
197 |             guidance_scale=params.guidance_scale,
198 |             width=params.width,
199 |             height=params.height,
200 |             output_type="pil",
201 |         )
202 | 
203 |         return results.images[0]
204 | 


--------------------------------------------------------------------------------
/server/pipelines/pix2pix/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radames/Real-Time-Latent-Consistency-Model/269f4347d93eb8e366e12b0f8f216c8b11262e76/server/pipelines/pix2pix/__init__.py


--------------------------------------------------------------------------------
/server/pipelines/pix2pix/model.py:
--------------------------------------------------------------------------------
 1 | # https://github.com/GaParmar/img2img-turbo/blob/main/src/model.py
 2 | from diffusers import DDPMScheduler
 3 | 
 4 | 
 5 | def make_1step_sched():
 6 |     noise_scheduler_1step = DDPMScheduler.from_pretrained(
 7 |         "stabilityai/sd-turbo", subfolder="scheduler"
 8 |     )
 9 |     noise_scheduler_1step.set_timesteps(1, device="cuda")
10 |     noise_scheduler_1step.alphas_cumprod = noise_scheduler_1step.alphas_cumprod.cuda()
11 |     return noise_scheduler_1step
12 | 
13 | 
14 | def my_vae_encoder_fwd(self, sample):
15 |     sample = self.conv_in(sample)
16 |     l_blocks = []
17 |     # down
18 |     for down_block in self.down_blocks:
19 |         l_blocks.append(sample)
20 |         sample = down_block(sample)
21 |     # middle
22 |     sample = self.mid_block(sample)
23 |     sample = self.conv_norm_out(sample)
24 |     sample = self.conv_act(sample)
25 |     sample = self.conv_out(sample)
26 |     self.current_down_blocks = l_blocks
27 |     return sample
28 | 
29 | 
30 | def my_vae_decoder_fwd(self, sample, latent_embeds=None):
31 |     sample = self.conv_in(sample)
32 |     upscale_dtype = next(iter(self.up_blocks.parameters())).dtype
33 |     # middle
34 |     sample = self.mid_block(sample, latent_embeds)
35 |     sample = sample.to(upscale_dtype)
36 |     if not self.ignore_skip:
37 |         skip_convs = [
38 |             self.skip_conv_1,
39 |             self.skip_conv_2,
40 |             self.skip_conv_3,
41 |             self.skip_conv_4,
42 |         ]
43 |         # up
44 |         for idx, up_block in enumerate(self.up_blocks):
45 |             skip_in = skip_convs[idx](self.incoming_skip_acts[::-1][idx] * self.gamma)
46 |             # add skip
47 |             sample = sample + skip_in
48 |             sample = up_block(sample, latent_embeds)
49 |     else:
50 |         for idx, up_block in enumerate(self.up_blocks):
51 |             sample = up_block(sample, latent_embeds)
52 |     # post-process
53 |     if latent_embeds is None:
54 |         sample = self.conv_norm_out(sample)
55 |     else:
56 |         sample = self.conv_norm_out(sample, latent_embeds)
57 |     sample = self.conv_act(sample)
58 |     sample = self.conv_out(sample)
59 |     return sample
60 | 


--------------------------------------------------------------------------------
/server/pipelines/pix2pix/pix2pix_turbo.py:
--------------------------------------------------------------------------------
  1 | # https://github.com/GaParmar/img2img-turbo/blob/main/src/pix2pix_turbo.py
  2 | import os
  3 | import requests
  4 | import sys
  5 | import pdb
  6 | import copy
  7 | from tqdm import tqdm
  8 | import torch
  9 | from transformers import AutoTokenizer, PretrainedConfig, CLIPTextModel
 10 | from diffusers import AutoencoderKL, UNet2DConditionModel, DDPMScheduler
 11 | from diffusers.utils.peft_utils import set_weights_and_activate_adapters
 12 | from peft import LoraConfig
 13 | 
 14 | from pipelines.pix2pix.model import (
 15 |     make_1step_sched,
 16 |     my_vae_encoder_fwd,
 17 |     my_vae_decoder_fwd,
 18 | )
 19 | 
 20 | 
 21 | class TwinConv(torch.nn.Module):
 22 |     def __init__(self, convin_pretrained, convin_curr):
 23 |         super(TwinConv, self).__init__()
 24 |         self.conv_in_pretrained = copy.deepcopy(convin_pretrained)
 25 |         self.conv_in_curr = copy.deepcopy(convin_curr)
 26 |         self.r = None
 27 | 
 28 |     def forward(self, x):
 29 |         x1 = self.conv_in_pretrained(x).detach()
 30 |         x2 = self.conv_in_curr(x)
 31 |         return x1 * (1 - self.r) + x2 * (self.r)
 32 | 
 33 | 
 34 | class Pix2Pix_Turbo(torch.nn.Module):
 35 |     def __init__(self, name, ckpt_folder="checkpoints"):
 36 |         super().__init__()
 37 |         self.tokenizer = AutoTokenizer.from_pretrained(
 38 |             "stabilityai/sd-turbo", subfolder="tokenizer"
 39 |         )
 40 |         self.text_encoder = CLIPTextModel.from_pretrained(
 41 |             "stabilityai/sd-turbo", subfolder="text_encoder"
 42 |         ).cuda()
 43 |         self.sched = make_1step_sched()
 44 | 
 45 |         vae = AutoencoderKL.from_pretrained("stabilityai/sd-turbo", subfolder="vae")
 46 |         unet = UNet2DConditionModel.from_pretrained(
 47 |             "stabilityai/sd-turbo", subfolder="unet"
 48 |         )
 49 | 
 50 |         if name == "edge_to_image":
 51 |             url = "https://www.cs.cmu.edu/~img2img-turbo/models/edge_to_image_loras.pkl"
 52 |             os.makedirs(ckpt_folder, exist_ok=True)
 53 |             outf = os.path.join(ckpt_folder, "edge_to_image_loras.pkl")
 54 |             if not os.path.exists(outf):
 55 |                 print(f"Downloading checkpoint to {outf}")
 56 |                 response = requests.get(url, stream=True)
 57 |                 total_size_in_bytes = int(response.headers.get("content-length", 0))
 58 |                 block_size = 1024  # 1 Kibibyte
 59 |                 progress_bar = tqdm(
 60 |                     total=total_size_in_bytes, unit="iB", unit_scale=True
 61 |                 )
 62 |                 with open(outf, "wb") as file:
 63 |                     for data in response.iter_content(block_size):
 64 |                         progress_bar.update(len(data))
 65 |                         file.write(data)
 66 |                 progress_bar.close()
 67 |                 if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
 68 |                     print("ERROR, something went wrong")
 69 |                 print(f"Downloaded successfully to {outf}")
 70 |             p_ckpt = outf
 71 |             sd = torch.load(p_ckpt, map_location="cpu")
 72 |             unet_lora_config = LoraConfig(
 73 |                 r=sd["rank_unet"],
 74 |                 init_lora_weights="gaussian",
 75 |                 target_modules=sd["unet_lora_target_modules"],
 76 |             )
 77 | 
 78 |         if name == "sketch_to_image_stochastic":
 79 |             # download from url
 80 |             url = "https://www.cs.cmu.edu/~img2img-turbo/models/sketch_to_image_stochastic_lora.pkl"
 81 |             os.makedirs(ckpt_folder, exist_ok=True)
 82 |             outf = os.path.join(ckpt_folder, "sketch_to_image_stochastic_lora.pkl")
 83 |             if not os.path.exists(outf):
 84 |                 print(f"Downloading checkpoint to {outf}")
 85 |                 response = requests.get(url, stream=True)
 86 |                 total_size_in_bytes = int(response.headers.get("content-length", 0))
 87 |                 block_size = 1024  # 1 Kibibyte
 88 |                 progress_bar = tqdm(
 89 |                     total=total_size_in_bytes, unit="iB", unit_scale=True
 90 |                 )
 91 |                 with open(outf, "wb") as file:
 92 |                     for data in response.iter_content(block_size):
 93 |                         progress_bar.update(len(data))
 94 |                         file.write(data)
 95 |                 progress_bar.close()
 96 |                 if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
 97 |                     print("ERROR, something went wrong")
 98 |                 print(f"Downloaded successfully to {outf}")
 99 |             p_ckpt = outf
100 |             sd = torch.load(p_ckpt, map_location="cpu")
101 |             unet_lora_config = LoraConfig(
102 |                 r=sd["rank_unet"],
103 |                 init_lora_weights="gaussian",
104 |                 target_modules=sd["unet_lora_target_modules"],
105 |             )
106 |             convin_pretrained = copy.deepcopy(unet.conv_in)
107 |             unet.conv_in = TwinConv(convin_pretrained, unet.conv_in)
108 | 
109 |         vae.encoder.forward = my_vae_encoder_fwd.__get__(
110 |             vae.encoder, vae.encoder.__class__
111 |         )
112 |         vae.decoder.forward = my_vae_decoder_fwd.__get__(
113 |             vae.decoder, vae.decoder.__class__
114 |         )
115 |         # add the skip connection convs
116 |         vae.decoder.skip_conv_1 = torch.nn.Conv2d(
117 |             512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
118 |         ).cuda()
119 |         vae.decoder.skip_conv_2 = torch.nn.Conv2d(
120 |             256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
121 |         ).cuda()
122 |         vae.decoder.skip_conv_3 = torch.nn.Conv2d(
123 |             128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
124 |         ).cuda()
125 |         vae.decoder.skip_conv_4 = torch.nn.Conv2d(
126 |             128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
127 |         ).cuda()
128 |         vae_lora_config = LoraConfig(
129 |             r=sd["rank_vae"],
130 |             init_lora_weights="gaussian",
131 |             target_modules=sd["vae_lora_target_modules"],
132 |         )
133 |         vae.decoder.ignore_skip = False
134 |         vae.add_adapter(vae_lora_config, adapter_name="vae_skip")
135 |         unet.add_adapter(unet_lora_config)
136 |         _sd_unet = unet.state_dict()
137 |         for k in sd["state_dict_unet"]:
138 |             _sd_unet[k] = sd["state_dict_unet"][k]
139 |         unet.load_state_dict(_sd_unet)
140 |         unet.enable_xformers_memory_efficient_attention()
141 |         _sd_vae = vae.state_dict()
142 |         for k in sd["state_dict_vae"]:
143 |             _sd_vae[k] = sd["state_dict_vae"][k]
144 |         vae.load_state_dict(_sd_vae)
145 |         unet.to("cuda")
146 |         vae.to("cuda")
147 |         unet.eval()
148 |         vae.eval()
149 |         self.unet, self.vae = unet, vae
150 |         self.vae.decoder.gamma = 1
151 |         self.timesteps = torch.tensor([999], device="cuda").long()
152 |         self.last_prompt = ""
153 |         self.caption_enc = None
154 |         self.device = "cuda"
155 | 
156 |     @torch.no_grad()
157 |     def forward(self, c_t, prompt, deterministic=True, r=1.0, noise_map=1.0):
158 |         # encode the text prompt
159 |         if prompt != self.last_prompt:
160 |             caption_tokens = self.tokenizer(
161 |                 prompt,
162 |                 max_length=self.tokenizer.model_max_length,
163 |                 padding="max_length",
164 |                 truncation=True,
165 |                 return_tensors="pt",
166 |             ).input_ids.cuda()
167 |             caption_enc = self.text_encoder(caption_tokens)[0]
168 |             self.caption_enc = caption_enc
169 |             self.last_prompt = prompt
170 | 
171 |         if deterministic:
172 |             encoded_control = (
173 |                 self.vae.encode(c_t).latent_dist.sample()
174 |                 * self.vae.config.scaling_factor
175 |             )
176 |             model_pred = self.unet(
177 |                 encoded_control,
178 |                 self.timesteps,
179 |                 encoder_hidden_states=self.caption_enc,
180 |             ).sample
181 |             x_denoised = self.sched.step(
182 |                 model_pred, self.timesteps, encoded_control, return_dict=True
183 |             ).prev_sample
184 |             self.vae.decoder.incoming_skip_acts = self.vae.encoder.current_down_blocks
185 |             output_image = (
186 |                 self.vae.decode(x_denoised / self.vae.config.scaling_factor).sample
187 |             ).clamp(-1, 1)
188 |         else:
189 |             # scale the lora weights based on the r value
190 |             self.unet.set_adapters(["default"], weights=[r])
191 |             set_weights_and_activate_adapters(self.vae, ["vae_skip"], [r])
192 |             encoded_control = (
193 |                 self.vae.encode(c_t).latent_dist.sample()
194 |                 * self.vae.config.scaling_factor
195 |             )
196 |             # combine the input and noise
197 |             unet_input = encoded_control * r + noise_map * (1 - r)
198 |             self.unet.conv_in.r = r
199 |             unet_output = self.unet(
200 |                 unet_input,
201 |                 self.timesteps,
202 |                 encoder_hidden_states=self.caption_enc,
203 |             ).sample
204 |             self.unet.conv_in.r = None
205 |             x_denoised = self.sched.step(
206 |                 unet_output, self.timesteps, unet_input, return_dict=True
207 |             ).prev_sample
208 |             self.vae.decoder.incoming_skip_acts = self.vae.encoder.current_down_blocks
209 |             self.vae.decoder.gamma = r
210 |             output_image = (
211 |                 self.vae.decode(x_denoised / self.vae.config.scaling_factor).sample
212 |             ).clamp(-1, 1)
213 |         return output_image
214 | 


--------------------------------------------------------------------------------
/server/pipelines/pix2pixTurbo.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torchvision import transforms
  3 | 
  4 | from config import Args
  5 | from pydantic import BaseModel, Field
  6 | from util import ParamsModel
  7 | from PIL import Image
  8 | from pipelines.pix2pix.pix2pix_turbo import Pix2Pix_Turbo
  9 | from pipelines.utils.canny_gpu import ScharrOperator
 10 | 
 11 | default_prompt = "close-up photo of the joker"
 12 | page_content = """
 13 | <h1 class="text-3xl font-bold">Real-Time pix2pix_turbo</h1>
 14 | <h3 class="text-xl font-bold">pix2pix turbo</h3>
 15 | <p class="text-sm">
 16 |     This demo showcases
 17 |     <a
 18 |     href="https://github.com/GaParmar/img2img-turbo"
 19 |     target="_blank"
 20 |     class="text-blue-500 underline hover:no-underline">One-Step Image Translation with Text-to-Image Models
 21 |     </a>
 22 | </p>
 23 | <p class="text-sm text-gray-500">
 24 |     Web app <a href="https://github.com/radames/Real-Time-Latent-Consistency-Model" target="_blank" class="text-blue-500 underline hover:no-underline">
 25 |     Real-Time Latent Consistency Models
 26 |     </a>
 27 | </p>
 28 | """
 29 | 
 30 | 
 31 | class Pipeline:
 32 |     class Info(BaseModel):
 33 |         name: str = "img2img"
 34 |         title: str = "Image-to-Image SDXL"
 35 |         description: str = "Generates an image from a text prompt"
 36 |         input_mode: str = "image"
 37 |         page_content: str = page_content
 38 | 
 39 |     class InputParams(ParamsModel):
 40 |         prompt: str = Field(
 41 |             default_prompt,
 42 |             title="Prompt",
 43 |             field="textarea",
 44 |             id="prompt",
 45 |         )
 46 | 
 47 |         width: int = Field(
 48 |             512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
 49 |         )
 50 |         height: int = Field(
 51 |             512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
 52 |         )
 53 |         seed: int = Field(
 54 |             2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
 55 |         )
 56 |         noise_r: float = Field(
 57 |             1.0,
 58 |             min=0.01,
 59 |             max=3.0,
 60 |             step=0.001,
 61 |             title="Noise R",
 62 |             field="range",
 63 |             hide=True,
 64 |             id="noise_r",
 65 |         )
 66 | 
 67 |         deterministic: bool = Field(
 68 |             True,
 69 |             hide=True,
 70 |             title="Deterministic",
 71 |             field="checkbox",
 72 |             id="deterministic",
 73 |         )
 74 |         canny_low_threshold: float = Field(
 75 |             0.0,
 76 |             min=0,
 77 |             max=1.0,
 78 |             step=0.001,
 79 |             title="Canny Low Threshold",
 80 |             field="range",
 81 |             hide=True,
 82 |             id="canny_low_threshold",
 83 |         )
 84 |         canny_high_threshold: float = Field(
 85 |             1.0,
 86 |             min=0,
 87 |             max=1.0,
 88 |             step=0.001,
 89 |             title="Canny High Threshold",
 90 |             field="range",
 91 |             hide=True,
 92 |             id="canny_high_threshold",
 93 |         )
 94 |         debug_canny: bool = Field(
 95 |             False,
 96 |             title="Debug Canny",
 97 |             field="checkbox",
 98 |             hide=True,
 99 |             id="debug_canny",
100 |         )
101 | 
102 |     def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
103 |         self.model = Pix2Pix_Turbo("edge_to_image")
104 |         self.canny_torch = ScharrOperator(device=device)
105 |         self.device = device
106 |         self.last_time = 0.0
107 | 
108 |     def predict(self, params: "Pipeline.InputParams") -> Image.Image:
109 |         canny_pil, canny_tensor = self.canny_torch(
110 |             params.image,
111 |             params.canny_low_threshold,
112 |             params.canny_high_threshold,
113 |             output_type="pil,tensor",
114 |         )
115 |         torch.manual_seed(params.seed)
116 |         noise = torch.randn(
117 |             (1, 4, params.width // 8, params.height // 8), device=self.device
118 |         )
119 |         canny_tensor = torch.cat((canny_tensor, canny_tensor, canny_tensor), dim=1)
120 |         output_image = self.model(
121 |             canny_tensor,
122 |             params.prompt,
123 |             params.deterministic,
124 |             params.noise_r,
125 |             noise,
126 |         )
127 |         output_pil = transforms.ToPILImage()(output_image[0].cpu() * 0.5 + 0.5)
128 | 
129 |         result_image = output_pil
130 |         if params.debug_canny:
131 |             # paste control_image on top of result_image
132 |             w0, h0 = (200, 200)
133 |             control_image = canny_pil.resize((w0, h0))
134 |             w1, h1 = result_image.size
135 |             result_image.paste(control_image, (w1 - w0, h1 - h0))
136 |         return result_image
137 | 


--------------------------------------------------------------------------------
/server/pipelines/txt2img.py:
--------------------------------------------------------------------------------
  1 | from diffusers import DiffusionPipeline, AutoencoderTiny
  2 | from compel import Compel
  3 | import torch
  4 | 
  5 | try:
  6 |     import intel_extension_for_pytorch as ipex  # type: ignore
  7 | except:
  8 |     pass
  9 | 
 10 | from config import Args
 11 | from pydantic import BaseModel, Field
 12 | from util import ParamsModel
 13 | from PIL import Image
 14 | from typing import List
 15 | from pruna import SmashConfig, smash
 16 | 
 17 | base_model = "SimianLuo/LCM_Dreamshaper_v7"
 18 | taesd_model = "madebyollin/taesd"
 19 | 
 20 | default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
 21 | 
 22 | page_content = """<h1 class="text-3xl font-bold">Real-Time Latent Consistency Model</h1>
 23 | <h3 class="text-xl font-bold">Text-to-Image</h3>
 24 | <p class="text-sm">
 25 |     This demo showcases
 26 |     <a
 27 |     href="https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7"
 28 |     target="_blank"
 29 |     class="text-blue-500 underline hover:no-underline">LCM</a>
 30 | Image to Image pipeline using
 31 |     <a
 32 |     href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/lcm#performing-inference-with-lcm"
 33 |     target="_blank"
 34 |     class="text-blue-500 underline hover:no-underline">Diffusers</a> with a MJPEG stream server
 35 | </p>
 36 | <p class="text-sm text-gray-500">
 37 |     Change the prompt to generate different images, accepts <a
 38 |     href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
 39 |     target="_blank"
 40 |     class="text-blue-500 underline hover:no-underline">Compel</a
 41 |     > syntax.
 42 | </p>"""
 43 | 
 44 | 
 45 | class Pipeline:
 46 |     class Info(BaseModel):
 47 |         name: str = "txt2img"
 48 |         title: str = "Text-to-Image LCM"
 49 |         description: str = "Generates an image from a text prompt"
 50 |         input_mode: str = "text"
 51 |         page_content: str = page_content
 52 | 
 53 |     class InputParams(ParamsModel):
 54 |         prompt: str = Field(
 55 |             default_prompt,
 56 |             title="Prompt",
 57 |             field="textarea",
 58 |             id="prompt",
 59 |         )
 60 |         seed: int = Field(
 61 |             2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
 62 |         )
 63 |         steps: int = Field(
 64 |             4, min=2, max=15, title="Steps", field="range", hide=True, id="steps"
 65 |         )
 66 |         width: int = Field(
 67 |             768, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
 68 |         )
 69 |         height: int = Field(
 70 |             768, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
 71 |         )
 72 |         guidance_scale: float = Field(
 73 |             8.0,
 74 |             min=1,
 75 |             max=30,
 76 |             step=0.001,
 77 |             title="Guidance Scale",
 78 |             field="range",
 79 |             hide=True,
 80 |             id="guidance_scale",
 81 |         )
 82 | 
 83 |     def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
 84 |         self.pipe = DiffusionPipeline.from_pretrained(base_model, safety_checker=None)
 85 |         if args.taesd:
 86 |             self.pipe.vae = AutoencoderTiny.from_pretrained(
 87 |                 taesd_model, torch_dtype=torch_dtype, use_safetensors=True
 88 |             ).to(device)
 89 | 
 90 |         if args.pruna:
 91 |             # Create and smash your model
 92 |             smash_config = SmashConfig()
 93 |             # smash_config["cacher"] = "deepcache"
 94 |             smash_config["compiler"] = "stable_fast"
 95 |             self.pipe = smash(model=self.pipe, smash_config=smash_config)
 96 | 
 97 |         if args.sfast:
 98 |             from sfast.compilers.stable_diffusion_pipeline_compiler import (
 99 |                 compile,
100 |                 CompilationConfig,
101 |             )
102 | 
103 |             config = CompilationConfig.Default()
104 |             config.enable_xformers = True
105 |             config.enable_triton = True
106 |             config.enable_cuda_graph = True
107 |             self.pipe = compile(self.pipe, config=config)
108 | 
109 |         self.pipe.set_progress_bar_config(disable=True)
110 |         self.pipe.to(device=device, dtype=torch_dtype)
111 |         if device.type != "mps":
112 |             self.pipe.unet.to(memory_format=torch.channels_last)
113 | 
114 |         if args.torch_compile:
115 |             self.pipe.unet = torch.compile(
116 |                 self.pipe.unet, mode="reduce-overhead", fullgraph=True
117 |             )
118 |             self.pipe.vae = torch.compile(
119 |                 self.pipe.vae, mode="reduce-overhead", fullgraph=True
120 |             )
121 | 
122 |             self.pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
123 | 
124 |         if args.compel:
125 |             self.compel_proc = Compel(
126 |                 tokenizer=self.pipe.tokenizer,
127 |                 text_encoder=self.pipe.text_encoder,
128 |                 truncate_long_prompts=False,
129 |             )
130 | 
131 |     def predict(self, params: "Pipeline.InputParams") -> Image.Image:
132 |         generator = torch.manual_seed(params.seed)
133 |         prompt_embeds = None
134 |         prompt = params.prompt
135 |         if hasattr(self, "compel_proc"):
136 |             prompt_embeds = self.compel_proc(params.prompt)
137 |             prompt = None
138 | 
139 |         results = self.pipe(
140 |             prompt_embeds=prompt_embeds,
141 |             prompt=prompt,
142 |             generator=generator,
143 |             num_inference_steps=params.steps,
144 |             guidance_scale=params.guidance_scale,
145 |             width=params.width,
146 |             height=params.height,
147 |             output_type="pil",
148 |         )
149 | 
150 |         return results.images[0]
151 | 


--------------------------------------------------------------------------------
/server/pipelines/txt2imgLora.py:
--------------------------------------------------------------------------------
  1 | from diffusers import DiffusionPipeline, AutoencoderTiny, LCMScheduler
  2 | from compel import Compel
  3 | import torch
  4 | 
  5 | try:
  6 |     import intel_extension_for_pytorch as ipex  # type: ignore
  7 | except:
  8 |     pass
  9 | 
 10 | import psutil
 11 | from config import Args
 12 | from pydantic import BaseModel, Field
 13 | from util import ParamsModel
 14 | from PIL import Image
 15 | from pruna import SmashConfig, smash
 16 | 
 17 | base_model = "wavymulder/Analog-Diffusion"
 18 | lcm_lora_id = "latent-consistency/lcm-lora-sdv1-5"
 19 | taesd_model = "madebyollin/taesd"
 20 | 
 21 | default_prompt = "Analog style photograph of young Harrison Ford as Han Solo, star wars behind the scenes"
 22 | 
 23 | page_content = """
 24 | <h1 class="text-3xl font-bold">Real-Time Latent Consistency Model SDv1.5</h1>
 25 | <h3 class="text-xl font-bold">Text-to-Image LCM + LoRa</h3>
 26 | <p class="text-sm">
 27 |     This demo showcases
 28 |     <a
 29 |     href="https://huggingface.co/blog/lcm_lora"
 30 |     target="_blank"
 31 |     class="text-blue-500 underline hover:no-underline">LCM</a>
 32 | Image to Image pipeline using
 33 |     <a
 34 |     href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/lcm#performing-inference-with-lcm"
 35 |     target="_blank"
 36 |     class="text-blue-500 underline hover:no-underline">Diffusers</a
 37 |     > with a MJPEG stream server. Featuring <a
 38 |     href="https://huggingface.co/wavymulder/Analog-Diffusion"
 39 |     target="_blank"
 40 |     class="text-blue-500 underline hover:no-underline">Analog-Diffusion</a>
 41 | </p>
 42 | <p class="text-sm text-gray-500">
 43 |     Change the prompt to generate different images, accepts <a
 44 |     href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
 45 |     target="_blank"
 46 |     class="text-blue-500 underline hover:no-underline">Compel</a
 47 |     > syntax.
 48 | </p>
 49 | """
 50 | 
 51 | 
 52 | class Pipeline:
 53 |     class Info(BaseModel):
 54 |         name: str = "controlnet"
 55 |         title: str = "Text-to-Image LCM + LoRa"
 56 |         description: str = "Generates an image from a text prompt"
 57 |         input_mode: str = "text"
 58 |         page_content: str = page_content
 59 | 
 60 |     class InputParams(ParamsModel):
 61 |         prompt: str = Field(
 62 |             default_prompt,
 63 |             title="Prompt",
 64 |             field="textarea",
 65 |             id="prompt",
 66 |         )
 67 |         seed: int = Field(
 68 |             8638236174640251, min=0, title="Seed", field="seed", hide=True, id="seed"
 69 |         )
 70 |         steps: int = Field(
 71 |             4, min=2, max=15, title="Steps", field="range", hide=True, id="steps"
 72 |         )
 73 |         width: int = Field(
 74 |             512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
 75 |         )
 76 |         height: int = Field(
 77 |             512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
 78 |         )
 79 |         guidance_scale: float = Field(
 80 |             0.2,
 81 |             min=0,
 82 |             max=4,
 83 |             step=0.001,
 84 |             title="Guidance Scale",
 85 |             field="range",
 86 |             hide=True,
 87 |             id="guidance_scale",
 88 |         )
 89 | 
 90 |     def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
 91 |         self.pipe = DiffusionPipeline.from_pretrained(base_model, safety_checker=None)
 92 |         if args.taesd:
 93 |             self.pipe.vae = AutoencoderTiny.from_pretrained(
 94 |                 taesd_model, torch_dtype=torch_dtype, use_safetensors=True
 95 |             ).to(device)
 96 | 
 97 |         if args.pruna:
 98 |             # Create and smash your model
 99 |             smash_config = SmashConfig()
100 |             # smash_config["cacher"] = "deepcache"
101 |             smash_config["compiler"] = "stable_fast"
102 |             self.pipe = smash(model=self.pipe, smash_config=smash_config)
103 | 
104 |         self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
105 |         self.pipe.set_progress_bar_config(disable=True)
106 |         self.pipe.load_lora_weights(lcm_lora_id, adapter_name="lcm")
107 |         self.pipe.to(device=device, dtype=torch_dtype)
108 | 
109 |         if device.type != "mps":
110 |             self.pipe.unet.to(memory_format=torch.channels_last)
111 | 
112 |         if args.torch_compile:
113 |             self.pipe.unet = torch.compile(
114 |                 self.pipe.unet, mode="reduce-overhead", fullgraph=True
115 |             )
116 |             self.pipe.vae = torch.compile(
117 |                 self.pipe.vae, mode="reduce-overhead", fullgraph=True
118 |             )
119 | 
120 |             self.pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
121 | 
122 |         if args.sfast:
123 |             from sfast.compilers.stable_diffusion_pipeline_compiler import (
124 |                 compile,
125 |                 CompilationConfig,
126 |             )
127 | 
128 |             config = CompilationConfig.Default()
129 |             config.enable_xformers = True
130 |             config.enable_triton = True
131 |             config.enable_cuda_graph = True
132 |             self.pipe = compile(self.pipe, config=config)
133 | 
134 |         if args.compel:
135 |             self.compel_proc = Compel(
136 |                 tokenizer=self.pipe.tokenizer,
137 |                 text_encoder=self.pipe.text_encoder,
138 |                 truncate_long_prompts=False,
139 |             )
140 | 
141 |     def predict(self, params: "Pipeline.InputParams") -> Image.Image:
142 |         generator = torch.manual_seed(params.seed)
143 |         prompt_embeds = None
144 |         prompt = params.prompt
145 |         if hasattr(self, "compel_proc"):
146 |             prompt_embeds = self.compel_proc(params.prompt)
147 |             prompt = None
148 | 
149 |         results = self.pipe(
150 |             prompt=prompt,
151 |             prompt_embeds=prompt_embeds,
152 |             generator=generator,
153 |             num_inference_steps=params.steps,
154 |             guidance_scale=params.guidance_scale,
155 |             width=params.width,
156 |             height=params.height,
157 |             output_type="pil",
158 |         )
159 | 
160 |         return results.images[0]
161 | 


--------------------------------------------------------------------------------
/server/pipelines/txt2imgLoraSDXL.py:
--------------------------------------------------------------------------------
  1 | from diffusers import DiffusionPipeline, LCMScheduler, AutoencoderKL, AutoencoderTiny
  2 | from compel import Compel, ReturnedEmbeddingsType
  3 | import torch
  4 | 
  5 | try:
  6 |     import intel_extension_for_pytorch as ipex  # type: ignore
  7 | except:
  8 |     pass
  9 | 
 10 | import psutil
 11 | from config import Args
 12 | from pydantic import BaseModel, Field
 13 | from util import ParamsModel
 14 | from PIL import Image
 15 | 
 16 | model_id = "stabilityai/stable-diffusion-xl-base-1.0"
 17 | lcm_lora_id = "latent-consistency/lcm-lora-sdxl"
 18 | taesd_model = "madebyollin/taesdxl"
 19 | 
 20 | 
 21 | default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux"
 22 | default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
 23 | page_content = """
 24 | <h1 class="text-3xl font-bold">Real-Time Latent Consistency Model</h1>
 25 | <h3 class="text-xl font-bold">Text-to-Image SDXL + LCM + LoRA</h3>
 26 | <p class="text-sm">
 27 |     This demo showcases
 28 |     <a
 29 |     href="https://huggingface.co/blog/lcm_lora"
 30 |     target="_blank"
 31 |     class="text-blue-500 underline hover:no-underline">LCM LoRA</a
 32 |     >
 33 |     Text to Image pipeline using
 34 |     <a
 35 |     href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/lcm#performing-inference-with-lcm"
 36 |     target="_blank"
 37 |     class="text-blue-500 underline hover:no-underline">Diffusers</a
 38 |     > with a MJPEG stream server.
 39 | </p>
 40 | <p class="text-sm text-gray-500">
 41 |     Change the prompt to generate different images, accepts <a
 42 |     href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
 43 |     target="_blank"
 44 |     class="text-blue-500 underline hover:no-underline">Compel</a
 45 |     > syntax.
 46 | </p>
 47 | """
 48 | 
 49 | 
 50 | class Pipeline:
 51 |     class Info(BaseModel):
 52 |         name: str = "LCM+Lora+SDXL"
 53 |         title: str = "Text-to-Image SDXL + LCM + LoRA"
 54 |         description: str = "Generates an image from a text prompt"
 55 |         page_content: str = page_content
 56 |         input_mode: str = "text"
 57 | 
 58 |     class InputParams(ParamsModel):
 59 |         prompt: str = Field(
 60 |             default_prompt,
 61 |             title="Prompt",
 62 |             field="textarea",
 63 |             id="prompt",
 64 |         )
 65 |         negative_prompt: str = Field(
 66 |             default_negative_prompt,
 67 |             title="Negative Prompt",
 68 |             field="textarea",
 69 |             id="negative_prompt",
 70 |             hide=True,
 71 |         )
 72 |         seed: int = Field(
 73 |             2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
 74 |         )
 75 |         steps: int = Field(
 76 |             4, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
 77 |         )
 78 |         width: int = Field(
 79 |             1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
 80 |         )
 81 |         height: int = Field(
 82 |             1024, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
 83 |         )
 84 |         guidance_scale: float = Field(
 85 |             1.0,
 86 |             min=0,
 87 |             max=20,
 88 |             step=0.001,
 89 |             title="Guidance Scale",
 90 |             field="range",
 91 |             hide=True,
 92 |             id="guidance_scale",
 93 |         )
 94 | 
 95 |     def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
 96 |         vae = AutoencoderKL.from_pretrained(
 97 |             "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype
 98 |         )
 99 | 
100 |         self.pipe = DiffusionPipeline.from_pretrained(
101 |             model_id,
102 |             safety_checker=None,
103 |             vae=vae,
104 |         )
105 |         # Load LCM LoRA
106 |         self.pipe.load_lora_weights(lcm_lora_id, adapter_name="lcm")
107 |         self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
108 |         self.pipe.set_progress_bar_config(disable=True)
109 |         self.pipe.to(device=device, dtype=torch_dtype).to(device)
110 | 
111 |         if args.sfast:
112 |             from sfast.compilers.stable_diffusion_pipeline_compiler import (
113 |                 compile,
114 |                 CompilationConfig,
115 |             )
116 | 
117 |             config = CompilationConfig.Default()
118 |             config.enable_xformers = True
119 |             config.enable_triton = True
120 |             config.enable_cuda_graph = True
121 |             self.pipe = compile(self.pipe, config=config)
122 | 
123 |         if device.type != "mps":
124 |             self.pipe.unet.to(memory_format=torch.channels_last)
125 | 
126 |         self.pipe.compel_proc = Compel(
127 |             tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
128 |             text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
129 |             returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
130 |             requires_pooled=[False, True],
131 |         )
132 |         if args.taesd:
133 |             self.pipe.vae = AutoencoderTiny.from_pretrained(
134 |                 taesd_model, torch_dtype=torch_dtype, use_safetensors=True
135 |             ).to(device)
136 | 
137 |         if args.torch_compile:
138 |             self.pipe.unet = torch.compile(
139 |                 self.pipe.unet, mode="reduce-overhead", fullgraph=True
140 |             )
141 |             self.pipe.vae = torch.compile(
142 |                 self.pipe.vae, mode="reduce-overhead", fullgraph=True
143 |             )
144 |             self.pipe(
145 |                 prompt="warmup",
146 |             )
147 | 
148 |     def predict(self, params: "Pipeline.InputParams") -> Image.Image:
149 |         generator = torch.manual_seed(params.seed)
150 | 
151 |         prompt = params.prompt
152 |         negative_prompt = params.negative_prompt
153 |         prompt_embeds = None
154 |         pooled_prompt_embeds = None
155 |         negative_prompt_embeds = None
156 |         negative_pooled_prompt_embeds = None
157 |         if hasattr(self.pipe, "compel_proc"):
158 |             _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
159 |                 [params.prompt, params.negative_prompt]
160 |             )
161 |             prompt = None
162 |             negative_prompt = None
163 |             prompt_embeds = _prompt_embeds[0:1]
164 |             pooled_prompt_embeds = pooled_prompt_embeds[0:1]
165 |             negative_prompt_embeds = _prompt_embeds[1:2]
166 |             negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2]
167 | 
168 |         results = self.pipe(
169 |             prompt=prompt,
170 |             negative_prompt=negative_prompt,
171 |             prompt_embeds=prompt_embeds,
172 |             pooled_prompt_embeds=pooled_prompt_embeds,
173 |             negative_prompt_embeds=negative_prompt_embeds,
174 |             negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
175 |             generator=generator,
176 |             num_inference_steps=params.steps,
177 |             guidance_scale=params.guidance_scale,
178 |             width=params.width,
179 |             height=params.height,
180 |             output_type="pil",
181 |         )
182 | 
183 |         return results.images[0]
184 | 


--------------------------------------------------------------------------------
/server/pipelines/utils/canny_gpu.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torchvision.transforms import ToTensor, ToPILImage
  4 | from PIL import Image
  5 | 
  6 | 
  7 | class SobelOperator(nn.Module):
  8 |     SOBEL_KERNEL_X = torch.tensor(
  9 |         [[-1.0, 0.0, 1.0], [-2.0, 0.0, 2.0], [-1.0, 0.0, 1.0]]
 10 |     )
 11 |     SOBEL_KERNEL_Y = torch.tensor(
 12 |         [[-1.0, -2.0, -1.0], [0.0, 0.0, 0.0], [1.0, 2.0, 1.0]]
 13 |     )
 14 | 
 15 |     def __init__(self, device="cuda"):
 16 |         super(SobelOperator, self).__init__()
 17 |         self.device = device
 18 |         self.edge_conv_x = nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False).to(
 19 |             self.device
 20 |         )
 21 |         self.edge_conv_y = nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False).to(
 22 |             self.device
 23 |         )
 24 |         self.edge_conv_x.weight = nn.Parameter(
 25 |             self.SOBEL_KERNEL_X.view((1, 1, 3, 3)).to(self.device)
 26 |         )
 27 |         self.edge_conv_y.weight = nn.Parameter(
 28 |             self.SOBEL_KERNEL_Y.view((1, 1, 3, 3)).to(self.device)
 29 |         )
 30 | 
 31 |     @torch.no_grad()
 32 |     def forward(
 33 |         self,
 34 |         image: Image.Image,
 35 |         low_threshold: float,
 36 |         high_threshold: float,
 37 |         output_type="pil",
 38 |     ) -> Image.Image | torch.Tensor | tuple[Image.Image, torch.Tensor]:
 39 |         # Convert PIL image to PyTorch tensor
 40 |         image_gray = image.convert("L")
 41 |         image_tensor = ToTensor()(image_gray).unsqueeze(0).to(self.device)
 42 | 
 43 |         # Compute gradients
 44 |         edge_x = self.edge_conv_x(image_tensor)
 45 |         edge_y = self.edge_conv_y(image_tensor)
 46 |         edge = torch.sqrt(torch.square(edge_x) + torch.square(edge_y))
 47 | 
 48 |         # Apply thresholding
 49 |         edge.div_(edge.max())  # Normalize to 0-1 (in-place operation)
 50 |         edge[edge >= high_threshold] = 1.0
 51 |         edge[edge <= low_threshold] = 0.0
 52 | 
 53 |         # Convert the result back to a PIL image
 54 |         if output_type == "pil":
 55 |             return ToPILImage()(edge.squeeze(0).cpu())
 56 |         elif output_type == "tensor":
 57 |             return edge
 58 |         elif output_type == "pil,tensor":
 59 |             return ToPILImage()(edge.squeeze(0).cpu()), edge
 60 | 
 61 | 
 62 | class ScharrOperator(nn.Module):
 63 |     SCHARR_KERNEL_X = torch.tensor(
 64 |         [[-3.0, 0.0, 3.0], [-10.0, 0.0, 10.0], [-3.0, 0.0, 3.0]]
 65 |     )
 66 |     SCHARR_KERNEL_Y = torch.tensor(
 67 |         [[-3.0, -10.0, -3.0], [0.0, 0.0, 0.0], [3.0, 10.0, 3.0]]
 68 |     )
 69 | 
 70 |     def __init__(self, device="cuda"):
 71 |         super(ScharrOperator, self).__init__()
 72 |         self.device = device
 73 |         self.edge_conv_x = nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False).to(
 74 |             self.device
 75 |         )
 76 |         self.edge_conv_y = nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False).to(
 77 |             self.device
 78 |         )
 79 |         self.edge_conv_x.weight = nn.Parameter(
 80 |             self.SCHARR_KERNEL_X.view((1, 1, 3, 3)).to(self.device)
 81 |         )
 82 |         self.edge_conv_y.weight = nn.Parameter(
 83 |             self.SCHARR_KERNEL_Y.view((1, 1, 3, 3)).to(self.device)
 84 |         )
 85 | 
 86 |     @torch.no_grad()
 87 |     def forward(
 88 |         self,
 89 |         image: Image.Image,
 90 |         low_threshold: float,
 91 |         high_threshold: float,
 92 |         output_type="pil",
 93 |         invert: bool = False,
 94 |     ) -> Image.Image | torch.Tensor | tuple[Image.Image, torch.Tensor]:
 95 |         # Convert PIL image to PyTorch tensor
 96 |         image_gray = image.convert("L")
 97 |         image_tensor = ToTensor()(image_gray).unsqueeze(0).to(self.device)
 98 | 
 99 |         # Compute gradients
100 |         edge_x = self.edge_conv_x(image_tensor)
101 |         edge_y = self.edge_conv_y(image_tensor)
102 |         edge = torch.abs(edge_x) + torch.abs(edge_y)
103 | 
104 |         # Apply thresholding
105 |         edge.div_(edge.max())  # Normalize to 0-1 (in-place operation)
106 |         edge[edge >= high_threshold] = 1.0
107 |         edge[edge <= low_threshold] = 0.0
108 |         if invert:
109 |             edge = 1 - edge
110 | 
111 |         # Convert the result back to a PIL image
112 |         if output_type == "pil":
113 |             return ToPILImage()(edge.squeeze(0).cpu())
114 |         elif output_type == "tensor":
115 |             return edge
116 |         elif output_type == "pil,tensor":
117 |             return ToPILImage()(edge.squeeze(0).cpu()), edge
118 | 


--------------------------------------------------------------------------------
/server/pipelines/utils/safety_checker.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import torch
 16 | import torch.nn as nn
 17 | from transformers import CLIPConfig, CLIPVisionModel, PreTrainedModel
 18 | from PIL import Image
 19 | 
 20 | 
 21 | def cosine_distance(image_embeds, text_embeds):
 22 |     normalized_image_embeds = nn.functional.normalize(image_embeds)
 23 |     normalized_text_embeds = nn.functional.normalize(text_embeds)
 24 |     return torch.mm(normalized_image_embeds, normalized_text_embeds.t())
 25 | 
 26 | 
 27 | class StableDiffusionSafetyChecker(PreTrainedModel):
 28 |     config_class = CLIPConfig
 29 | 
 30 |     _no_split_modules = ["CLIPEncoderLayer"]
 31 | 
 32 |     def __init__(self, config: CLIPConfig):
 33 |         super().__init__(config)
 34 | 
 35 |         self.vision_model = CLIPVisionModel(config.vision_config)
 36 |         self.visual_projection = nn.Linear(
 37 |             config.vision_config.hidden_size, config.projection_dim, bias=False
 38 |         )
 39 | 
 40 |         self.concept_embeds = nn.Parameter(
 41 |             torch.ones(17, config.projection_dim), requires_grad=False
 42 |         )
 43 |         self.special_care_embeds = nn.Parameter(
 44 |             torch.ones(3, config.projection_dim), requires_grad=False
 45 |         )
 46 | 
 47 |         self.concept_embeds_weights = nn.Parameter(torch.ones(17), requires_grad=False)
 48 |         self.special_care_embeds_weights = nn.Parameter(
 49 |             torch.ones(3), requires_grad=False
 50 |         )
 51 | 
 52 |     @torch.no_grad()
 53 |     def forward(self, clip_input, images):
 54 |         pooled_output = self.vision_model(clip_input)[1]  # pooled_output
 55 |         image_embeds = self.visual_projection(pooled_output)
 56 | 
 57 |         # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16
 58 |         special_cos_dist = (
 59 |             cosine_distance(image_embeds, self.special_care_embeds)
 60 |             .cpu()
 61 |             .float()
 62 |             .numpy()
 63 |         )
 64 |         cos_dist = (
 65 |             cosine_distance(image_embeds, self.concept_embeds).cpu().float().numpy()
 66 |         )
 67 | 
 68 |         result = []
 69 |         batch_size = image_embeds.shape[0]
 70 |         for i in range(batch_size):
 71 |             result_img = {
 72 |                 "special_scores": {},
 73 |                 "special_care": [],
 74 |                 "concept_scores": {},
 75 |                 "bad_concepts": [],
 76 |             }
 77 | 
 78 |             # increase this value to create a stronger `nfsw` filter
 79 |             # at the cost of increasing the possibility of filtering benign images
 80 |             adjustment = 0.0
 81 | 
 82 |             for concept_idx in range(len(special_cos_dist[0])):
 83 |                 concept_cos = special_cos_dist[i][concept_idx]
 84 |                 concept_threshold = self.special_care_embeds_weights[concept_idx].item()
 85 |                 result_img["special_scores"][concept_idx] = round(
 86 |                     concept_cos - concept_threshold + adjustment, 3
 87 |                 )
 88 |                 if result_img["special_scores"][concept_idx] > 0:
 89 |                     result_img["special_care"].append(
 90 |                         {concept_idx, result_img["special_scores"][concept_idx]}
 91 |                     )
 92 |                     adjustment = 0.01
 93 | 
 94 |             for concept_idx in range(len(cos_dist[0])):
 95 |                 concept_cos = cos_dist[i][concept_idx]
 96 |                 concept_threshold = self.concept_embeds_weights[concept_idx].item()
 97 |                 result_img["concept_scores"][concept_idx] = round(
 98 |                     concept_cos - concept_threshold + adjustment, 3
 99 |                 )
100 |                 if result_img["concept_scores"][concept_idx] > 0:
101 |                     result_img["bad_concepts"].append(concept_idx)
102 | 
103 |             result.append(result_img)
104 | 
105 |         has_nsfw_concepts = [len(res["bad_concepts"]) > 0 for res in result]
106 | 
107 |         return has_nsfw_concepts
108 | 
109 |     @torch.no_grad()
110 |     def forward_onnx(self, clip_input: torch.FloatTensor, images: torch.FloatTensor):
111 |         pooled_output = self.vision_model(clip_input)[1]  # pooled_output
112 |         image_embeds = self.visual_projection(pooled_output)
113 | 
114 |         special_cos_dist = cosine_distance(image_embeds, self.special_care_embeds)
115 |         cos_dist = cosine_distance(image_embeds, self.concept_embeds)
116 | 
117 |         # increase this value to create a stronger `nsfw` filter
118 |         # at the cost of increasing the possibility of filtering benign images
119 |         adjustment = 0.0
120 | 
121 |         special_scores = (
122 |             special_cos_dist - self.special_care_embeds_weights + adjustment
123 |         )
124 |         # special_scores = special_scores.round(decimals=3)
125 |         special_care = torch.any(special_scores > 0, dim=1)
126 |         special_adjustment = special_care * 0.01
127 |         special_adjustment = special_adjustment.unsqueeze(1).expand(
128 |             -1, cos_dist.shape[1]
129 |         )
130 | 
131 |         concept_scores = (cos_dist - self.concept_embeds_weights) + special_adjustment
132 |         # concept_scores = concept_scores.round(decimals=3)
133 |         has_nsfw_concepts = torch.any(concept_scores > 0, dim=1)
134 | 
135 |         images[has_nsfw_concepts] = 0.0  # black image
136 | 
137 |         return images, has_nsfw_concepts
138 | 
139 | 
140 | class SafetyChecker:
141 |     def __init__(self, device="cuda"):
142 |         from transformers import CLIPFeatureExtractor
143 | 
144 |         self.device = device
145 |         self.safety_checker = StableDiffusionSafetyChecker.from_pretrained(
146 |             "CompVis/stable-diffusion-safety-checker"
147 |         ).to(device)
148 |         self.feature_extractor = CLIPFeatureExtractor.from_pretrained(
149 |             "openai/clip-vit-base-patch32"
150 |         )
151 | 
152 |     def __call__(
153 |         self, images: list[Image.Image] | Image.Image
154 |     ) -> tuple[list[Image.Image], list[bool]] | tuple[Image.Image, bool]:
155 |         images_list = [images] if isinstance(images, Image.Image) else images
156 | 
157 |         safety_checker_input = self.feature_extractor(
158 |             images_list, return_tensors="pt"
159 |         ).to(self.device)
160 | 
161 |         has_nsfw_concepts = self.safety_checker(
162 |             images=[images_list],
163 |             clip_input=safety_checker_input.pixel_values.to(self.device),
164 |         )
165 | 
166 |         if isinstance(images, Image.Image):
167 |             return images, has_nsfw_concepts[0]
168 | 
169 |         return images, has_nsfw_concepts
170 | 


--------------------------------------------------------------------------------
/server/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Use with: uv pip install --no-cache --system --index-strategy=unsafe-best-match -r requirements.txt
 2 | numpy
 3 | diffusers<=0.33.1
 4 | llvmlite>=0.39.0
 5 | numba>=0.56.0
 6 | transformers
 7 | pydantic
 8 | huggingface-hub
 9 | hf_transfer
10 | fastapi
11 | uvicorn[standard]
12 | Pillow==11.0.0
13 | accelerate
14 | compel==2.0.2
15 | controlnet-aux==0.0.9
16 | peft
17 | markdown2
18 | safetensors
19 | setuptools
20 | mpmath
21 | controlnet-aux
22 | sentencepiece==0.2.0
23 | optimum-quanto # has to be optimum-quanto==0.2.5 for pruna int4
24 | gguf
25 | types-Pillow
26 | mypy
27 | python-dotenv
28 | requests>=2.31.0  # Added explicitly to resolve dependency conflict
29 | 
30 | --extra-index-url https://download.pytorch.org/whl/cu118
31 | torch==2.5.1
32 | torchvision
33 | torchaudio
34 | xformers; sys_platform != 'darwin' or platform_machine != 'arm64'
35 | pruna[stable-fast] ; sys_platform != 'darwin' or platform_machine != 'arm64'
36 | 
37 | # stable_fast @ https://github.com/chengzeyi/stable-fast/releases/download/nightly/stable_fast-1.0.5.dev20241127+torch230cu121-cp310-cp310-manylinux2014_x86_64.whl ; sys_platform != 'darwin' or platform_machine != 'arm64'
38 | #oneflow @ https://github.com/siliconflow/oneflow_releases/releases/download/community_cu121/oneflow-0.9.1.dev20241114%2Bcu121-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl ; sys_platform != 'darwin' or platform_machine != 'arm64'
39 | #onediff @ git+https://github.com/siliconflow/onediff.git@main#egg=onediff ; sys_platform != 'darwin' or platform_machine != 'arm64'


--------------------------------------------------------------------------------
/server/util.py:
--------------------------------------------------------------------------------
 1 | from importlib import import_module
 2 | from typing import Any, TypeVar
 3 | from PIL import Image
 4 | import io
 5 | from pydantic import BaseModel
 6 | 
 7 | 
 8 | # Used only for type checking the pipeline class
 9 | TPipeline = TypeVar("TPipeline", bound=type[Any])
10 | 
11 | 
12 | class ParamsModel(BaseModel):
13 |     """Base model for pipeline parameters."""
14 | 
15 |     model_config = {
16 |         "arbitrary_types_allowed": True,
17 |         "extra": "allow",  # Allow extra attributes for dynamic fields like 'image'
18 |     }
19 | 
20 |     @classmethod
21 |     def from_dict(cls, data: dict[str, Any]) -> "ParamsModel":
22 |         """Create a model instance from dictionary data."""
23 |         return cls.model_validate(data)
24 | 
25 |     def to_dict(self) -> dict[str, Any]:
26 |         """Convert model to dictionary."""
27 |         return self.model_dump()
28 | 
29 | 
30 | def get_pipeline_class(pipeline_name: str) -> type:
31 |     """
32 |     Dynamically imports and returns the Pipeline class from a specified module.
33 | 
34 |     Args:
35 |         pipeline_name: The name of the pipeline module to import
36 | 
37 |     Returns:
38 |         The Pipeline class from the specified module
39 | 
40 |     Raises:
41 |         ValueError: If the module or Pipeline class isn't found
42 |         TypeError: If Pipeline is not a class
43 |     """
44 |     try:
45 |         module = import_module(f"pipelines.{pipeline_name}")
46 |     except ModuleNotFoundError:
47 |         raise ValueError(f"Pipeline {pipeline_name} module not found")
48 | 
49 |     pipeline_class = getattr(module, "Pipeline", None)
50 | 
51 |     if pipeline_class is None:
52 |         raise ValueError(f"'Pipeline' class not found in module '{pipeline_name}'.")
53 | 
54 |     # Type check to ensure we're returning a class
55 |     if not isinstance(pipeline_class, type):
56 |         raise TypeError(f"'Pipeline' in module '{pipeline_name}' is not a class")
57 | 
58 |     return pipeline_class
59 | 
60 | 
61 | def bytes_to_pil(image_bytes: bytes) -> Image.Image:
62 |     image = Image.open(io.BytesIO(image_bytes))
63 |     return image
64 | 
65 | 
66 | def pil_to_frame(image: Image.Image) -> bytes:
67 |     frame_data = io.BytesIO()
68 |     image.save(frame_data, format="JPEG", quality=80, optimize=True, progressive=True)
69 |     frame_data = frame_data.getvalue()
70 |     return (
71 |         b"--frame\r\n"
72 |         + b"Content-Type: image/jpeg\r\n"
73 |         + f"Content-Length: {len(frame_data)}\r\n\r\n".encode()
74 |         + frame_data
75 |         + b"\r\n"
76 |     )
77 | 
78 | 
79 | def is_firefox(user_agent: str) -> bool:
80 |     return "Firefox" in user_agent
81 | 


--------------------------------------------------------------------------------