├── .gitignore ├── .gitmodules ├── .vscode └── settings.json ├── Dockerfile ├── Dockerfile.cuda118 ├── Dockerfile.protoc ├── LICENSE ├── README.md ├── build.sh ├── client.py ├── docker_support ├── CMakeFile.txt.diff ├── build_protoc.sh ├── cpp_extension.py └── run.sh ├── environment.yaml ├── nonfree ├── README.md ├── tome_memory_efficient_cross_attention.py ├── tome_patcher.py └── tome_unet.py ├── pyproject.toml ├── requirements.txt ├── sdgrpcserver ├── __init__.py ├── config │ ├── dist_hashes │ ├── engines.yaml │ └── genhashes.sh ├── constants.py ├── debug_recorder.py ├── generated │ ├── __init__.py │ ├── dashboard_pb2.py │ ├── dashboard_pb2_grpc.py │ ├── engines_pb2.py │ ├── engines_pb2_grpc.py │ ├── generation_pb2.py │ ├── generation_pb2_grpc.py │ ├── tensors_pb2.py │ └── tensors_pb2_grpc.py ├── images.py ├── k_diffusion.py ├── manager.py ├── patching.py ├── pipeline │ ├── __init__.py │ ├── attention_replacer.py │ ├── common_scheduler.py │ ├── diffusers_types.py │ ├── easing.py │ ├── kschedulers │ │ ├── __init__.py │ │ ├── scheduling_dpm2_ancestral_discrete.py │ │ ├── scheduling_dpm2_discrete.py │ │ ├── scheduling_euler_ancestral_discrete.py │ │ ├── scheduling_euler_discrete.py │ │ ├── scheduling_heun_discrete.py │ │ └── scheduling_utils.py │ ├── latent_debugger.py │ ├── model_utils.py │ ├── models │ │ ├── __init__.py │ │ ├── memory_efficient_cross_attention.py │ │ └── structured_cross_attention.py │ ├── randtools.py │ ├── safety_checkers.py │ ├── schedulers │ │ ├── sample_dpmpp_2m.py │ │ └── scheduling_ddim.py │ ├── text_embedding │ │ ├── __init__.py │ │ ├── basic_text_embedding.py │ │ ├── lpw_text_embedding.py │ │ ├── structured_text_embedding.py │ │ ├── text_embedding.py │ │ └── text_encoder_alt_layer.py │ ├── unet │ │ ├── cfg.py │ │ ├── clipguided.py │ │ ├── core.py │ │ ├── graft.py │ │ ├── hires_fix.py │ │ ├── hires_fix_other.py │ │ ├── hires_fix_resize.py │ │ └── types.py │ ├── unified_pipeline.py │ ├── upscaler_pipeline.py │ ├── vae_approximator.py │ └── xformers_utils.py ├── ram_monitor.py ├── resize_right.py ├── server.py ├── services │ ├── __init__.py │ ├── dashboard.py │ ├── engines.py │ └── generate.py ├── sonora │ ├── __init__.py │ ├── __version__.py │ ├── aio.py │ ├── asgi.py │ ├── client.py │ ├── echotest.py │ ├── protocol.py │ └── wsgi.py ├── src │ └── __init__.py └── utils.py ├── server.py ├── stablecabal.png ├── tests ├── VRAMUsageMonitor.py ├── batch_independance.py ├── engines.basic.yaml ├── engines.clip.yaml ├── engines.inpaint.yaml ├── engines.sd14.yaml ├── graftedpaint.engine.yaml ├── graftedpaint.image_512_512.png ├── graftedpaint.image_768_600.png ├── graftedpaint.image_768_768.png ├── graftedpaint.py ├── happy_path.engines.yaml ├── happy_path.image_512.png ├── happy_path.image_768.png ├── happy_path.py ├── image.png ├── mask.png ├── prompt_weights.engine.yaml ├── prompt_weights.py ├── schedulers.py └── test_harness.py └── weights └── .gitkeep /.gitignore: -------------------------------------------------------------------------------- 1 | # Initially taken from Github's Python gitignore file 2 | _[A-Za-z]* 3 | weights/* 4 | src/* 5 | tests/out/* 6 | /config.yaml 7 | 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | 13 | # C extensions 14 | *.so 15 | 16 | # tests and logs 17 | tests/fixtures/cached_*_text.txt 18 | logs/ 19 | lightning_logs/ 20 | lang_code_data/ 21 | 22 | # Distribution / packaging 23 | .Python 24 | build/ 25 | develop-eggs/ 26 | dist/ 27 | downloads/ 28 | eggs/ 29 | .eggs/ 30 | lib/ 31 | lib64/ 32 | parts/ 33 | sdist/ 34 | var/ 35 | wheels/ 36 | *.egg-info/ 37 | .installed.cfg 38 | *.egg 39 | MANIFEST 40 | 41 | # PyInstaller 42 | # Usually these files are written by a python script from a template 43 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 44 | *.manifest 45 | *.spec 46 | 47 | # Installer logs 48 | pip-log.txt 49 | pip-delete-this-directory.txt 50 | 51 | # Unit test / coverage reports 52 | htmlcov/ 53 | .tox/ 54 | .nox/ 55 | .coverage 56 | .coverage.* 57 | .cache 58 | nosetests.xml 59 | coverage.xml 60 | *.cover 61 | .hypothesis/ 62 | .pytest_cache/ 63 | 64 | # Translations 65 | *.mo 66 | *.pot 67 | 68 | # Django stuff: 69 | *.log 70 | local_settings.py 71 | db.sqlite3 72 | 73 | # Flask stuff: 74 | instance/ 75 | .webassets-cache 76 | 77 | # Scrapy stuff: 78 | .scrapy 79 | 80 | # Sphinx documentation 81 | docs/_build/ 82 | 83 | # PyBuilder 84 | target/ 85 | 86 | # Jupyter Notebook 87 | .ipynb_checkpoints 88 | 89 | # IPython 90 | profile_default/ 91 | ipython_config.py 92 | 93 | # pyenv 94 | .python-version 95 | 96 | # celery beat schedule file 97 | celerybeat-schedule 98 | 99 | # SageMath parsed files 100 | *.sage.py 101 | 102 | # Environments 103 | .env 104 | .venv 105 | env/ 106 | venv/ 107 | ENV/ 108 | env.bak/ 109 | venv.bak/ 110 | 111 | # Spyder project settings 112 | .spyderproject 113 | .spyproject 114 | 115 | # Rope project settings 116 | .ropeproject 117 | 118 | # mkdocs documentation 119 | /site 120 | 121 | # mypy 122 | .mypy_cache/ 123 | .dmypy.json 124 | dmypy.json 125 | 126 | # Pyre type checker 127 | .pyre/ 128 | 129 | # vscode 130 | .vs 131 | .vscode/* 132 | 133 | # Pycharm 134 | .idea 135 | 136 | # TF code 137 | tensorflow_code 138 | 139 | # Models 140 | proc_data 141 | 142 | # examples 143 | runs 144 | /runs_old 145 | /wandb 146 | /examples/runs 147 | /examples/**/*.args 148 | /examples/rag/sweep 149 | 150 | # data 151 | /data 152 | serialization_dir 153 | 154 | # emacs 155 | *.*~ 156 | debug.env 157 | 158 | # vim 159 | .*.swp 160 | 161 | #ctags 162 | tags 163 | 164 | # pre-commit 165 | .pre-commit* 166 | 167 | # .lock 168 | *.lock 169 | 170 | # DS_Store (MacOS) 171 | .DS_Store 172 | 173 | # Always include .gitkeep files 174 | !.gitkeep 175 | !.vscode/settings.json -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "api-interfaces"] 2 | path = api-interfaces 3 | url = https://github.com/hafriedlander/api-interfaces.git 4 | [submodule "nonfree/ToMe"] 5 | path = nonfree/ToMe 6 | url = https://github.com/facebookresearch/ToMe.git 7 | [submodule "sdgrpcserver/src/k-diffusion"] 8 | path = sdgrpcserver/src/k-diffusion 9 | url = https://github.com/crowsonkb/k-diffusion.git 10 | [submodule "sdgrpcserver/src/ResizeRight"] 11 | path = sdgrpcserver/src/ResizeRight 12 | url = https://github.com/assafshocher/ResizeRight.git 13 | ignore = dirty 14 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.analysis.typeCheckingMode": "basic", 3 | "python.analysis.diagnosticMode": "workspace", 4 | "python.analysis.include": [ 5 | "sdgrpcserver/pipeline" 6 | ], 7 | "python.analysis.exclude": [ 8 | "sdgrpcserver/pipeline/kschedulers", 9 | "sdgrpcserver/pipeline/schedulers" 10 | ], 11 | "python.analysis.extraPaths": [ 12 | "sdgrpcserver/generated" 13 | ], 14 | "python.formatting.provider": "black", 15 | "python.linting.enabled": true, 16 | "python.linting.lintOnSave": true, 17 | "python.linting.flake8Enabled": true, 18 | "editor.rulers": [ 19 | 88, 20 | ], 21 | "editor.formatOnSave": true, 22 | "editor.codeActionsOnSave": { 23 | "source.organizeImports": true 24 | }, 25 | } -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.6.2-devel-ubuntu20.04 AS devbase 2 | 3 | # Basic updates. Do super early so we can cache for a long time 4 | RUN apt update 5 | RUN apt install -y curl 6 | RUN apt install -y git 7 | 8 | # Set up core python environment 9 | RUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba 10 | 11 | COPY environment.yaml . 12 | RUN /bin/micromamba -r /env -y create -f environment.yaml 13 | 14 | # Install dependancies 15 | ENV PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu116 16 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip install torch~=1.12.1 17 | 18 | 19 | 20 | 21 | FROM devbase AS regularbase 22 | 23 | # Install dependancies 24 | ENV PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu116 25 | ENV FLIT_ROOT_INSTALL=1 26 | 27 | # We copy only the minimum for flit to run so avoid cache invalidation on code changes 28 | COPY pyproject.toml . 29 | COPY sdgrpcserver/__init__.py sdgrpcserver/ 30 | RUN touch README.md 31 | RUN /bin/micromamba -r /env -n sd-grpc-server run flit install --pth-file 32 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip cache purge 33 | 34 | # Setup NVM & Node for Localtunnel 35 | ENV NVM_DIR=/nvm 36 | ENV NODE_VERSION=16.18.0 37 | 38 | RUN mkdir -p $NVM_DIR 39 | 40 | RUN curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.1/install.sh | bash \ 41 | && . $NVM_DIR/nvm.sh \ 42 | && nvm install $NODE_VERSION \ 43 | && nvm alias default $NODE_VERSION \ 44 | && nvm use default 45 | 46 | 47 | 48 | 49 | # Build Xformers 50 | 51 | FROM devbase AS xformersbase 52 | 53 | RUN git clone https://github.com/facebookresearch/xformers.git 54 | WORKDIR /xformers 55 | RUN git submodule update --init --recursive 56 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip install -r requirements.txt 57 | 58 | ENV FORCE_CUDA=1 59 | ENV TORCH_CUDA_ARCH_LIST="6.0;6.1;6.2;7.0;7.2;7.5;8.0;8.6" 60 | 61 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip install . 62 | 63 | RUN tar cvjf /xformers.tbz /env/envs/sd-grpc-server/lib/python3.*/site-packages/xformers* 64 | 65 | 66 | 67 | 68 | FROM nvidia/cuda:11.6.2-base-ubuntu20.04 AS main 69 | 70 | COPY --from=regularbase /bin/micromamba /bin/ 71 | RUN mkdir -p /env/envs 72 | COPY --from=regularbase /env/envs /env/envs/ 73 | RUN mkdir -p /nvm 74 | COPY --from=regularbase /nvm /nvm/ 75 | 76 | # Setup NVM & Node for Localtunnel 77 | ENV NVM_DIR=/nvm 78 | ENV NODE_VERSION=16.18.0 79 | 80 | ENV NODE_PATH $NVM_DIR/versions/node/v$NODE_VERSION/lib/node_modules 81 | ENV PATH $NVM_DIR/versions/node/v$NODE_VERSION/bin:$PATH 82 | 83 | RUN npm install -g localtunnel 84 | 85 | # Now we can copy everything we need 86 | COPY sdgrpcserver /sdgrpcserver/ 87 | COPY server.py . 88 | 89 | # Set up some config files 90 | RUN mkdir -p /huggingface 91 | RUN mkdir -p /weights 92 | RUN mkdir -p /config 93 | COPY sdgrpcserver/config/engines.yaml /config/engines.yaml 94 | 95 | # Set up some environment files 96 | 97 | ENV HF_HOME=/huggingface 98 | ENV HF_API_TOKEN=mustset 99 | ENV SD_ENGINECFG=/config/engines.yaml 100 | ENV SD_WEIGHT_ROOT=/weights 101 | 102 | CMD [ "/bin/micromamba", "-r", "env", "-n", "sd-grpc-server", "run", "python", "./server.py" ] 103 | 104 | 105 | 106 | 107 | FROM main as xformers 108 | 109 | COPY --from=xformersbase /xformers/requirements.txt / 110 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip install -r requirements.txt 111 | RUN rm requirements.txt 112 | COPY --from=xformersbase /xformers.tbz / 113 | RUN tar xvjf /xformers.tbz 114 | RUN rm /xformers.tbz 115 | 116 | CMD [ "/bin/micromamba", "-r", "env", "-n", "sd-grpc-server", "run", "python", "./server.py" ] 117 | -------------------------------------------------------------------------------- /Dockerfile.cuda118: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04 AS buildpytorch 2 | 3 | # Basic updates. Do super early so we can cache for a long time 4 | RUN apt update 5 | RUN apt install -y curl 6 | RUN apt install -y git 7 | 8 | # Set up core python environment 9 | RUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba 10 | 11 | # Set up basic python 12 | RUN /bin/micromamba -r /env -y create -c defaults -n sd-grpc-server python~=3.10.0 pip 13 | # Install MKL from Intel - it's newer than conda-forge one 14 | RUN /bin/micromamba -r /env -n sd-grpc-server install -c intel mkl=2022.2.0 mkl-devel=2022.2.0 15 | # Install dependancies 16 | RUN /bin/micromamba -r /env -n sd-grpc-server install -c defaults astunparse numpy ninja pyyaml setuptools cmake cffi typing_extensions future six requests dataclasses 17 | 18 | # Make sure cmake looks in our enviroment 19 | ENV CMAKE_PREFIX_PATH=/env/envs/sd-grpc-server 20 | 21 | # Download magma 22 | RUN curl -OL http://icl.utk.edu/projectsfiles/magma/downloads/magma-2.6.2.tar.gz 23 | RUN tar xvzf magma-2.6.2.tar.gz 24 | RUN mkdir -p /magma-2.6.2/build 25 | 26 | # Modify magma CMakeFile to allow arbitrary CUDA arches 27 | WORKDIR /magma-2.6.2 28 | COPY docker_support/CMakeFile.txt.diff / 29 | RUN patch < /CMakeFile.txt.diff 30 | 31 | # Build it 32 | WORKDIR /magma-2.6.2/build 33 | RUN /bin/micromamba -r /env -n sd-grpc-server run cmake .. -DMAGMA_WITH_MKL=1 -DUSE_FORTRAN=off -DGPU_TARGET="All" -DCUDA_ARCH_LIST="-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_89,code=sm_89" 34 | #RUN /bin/micromamba -r /env -n sd-grpc-server run cmake .. -DMAGMA_WITH_MKL=1 -DUSE_FORTRAN=off -DGPU_TARGET="sm_89" -DCUDA_ARCH_LIST="-gencode arch=compute_89,code=sm_89" 35 | RUN /bin/micromamba -r /env -n sd-grpc-server run make lib 36 | RUN /bin/micromamba -r /env -n sd-grpc-server run make sparse-lib 37 | RUN /bin/micromamba -r /env -n sd-grpc-server run make install 38 | 39 | # Move into env (TODO: just build here in the first place) 40 | RUN mv /usr/local/magma/lib/*.so /env/envs/sd-grpc-server/lib/ 41 | RUN mv /usr/local/magma/lib/pkgconfig/*.pc /env/envs/sd-grpc-server/lib/pkgconfig/ 42 | RUN mv /usr/local/magma/include/* /env/envs/sd-grpc-server/include/ 43 | 44 | # Download pytorch 45 | WORKDIR / 46 | RUN git clone https://github.com/pytorch/pytorch 47 | 48 | WORKDIR /pytorch 49 | RUN git checkout -b v1.12.1 tags/v1.12.1 50 | RUN git submodule update --init --recursive 51 | 52 | # Built and install pytorch 53 | ENV FORCE_CUDA=1 54 | ENV TORCH_CUDA_ARCH_LIST="6.0;7.0;7.5;8.0;8.6;8.9" 55 | ENV USE_KINETO=0 56 | ENV USE_CUPTI=0 57 | ENV PYTORCH_BUILD_VERSION=1.12.1+cu118 58 | ENV PYTORCH_BUILD_NUMBER=0 59 | COPY docker_support/cpp_extension.py /pytorch/torch/utils/ 60 | RUN /bin/micromamba -r /env -n sd-grpc-server run python tools/generate_torch_version.py --is_debug false --cuda_version 11.8 61 | RUN /bin/micromamba -r /env -n sd-grpc-server run python setup.py install 62 | 63 | # Download torchvision 64 | WORKDIR / 65 | RUN git clone https://github.com/pytorch/vision 66 | 67 | WORKDIR /vision 68 | RUN git checkout -b v0.13.1 tags/v0.13.1 69 | 70 | # Install torchvision dependancies 71 | RUN /bin/micromamba -r /env -n sd-grpc-server install -c defaults libpng jpeg 72 | 73 | # Built and install torchvision 74 | ENV BUILD_VERSION=0.13.1+cu118 75 | RUN /bin/micromamba -r /env -n sd-grpc-server run python setup.py install 76 | 77 | 78 | 79 | 80 | FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04 AS devbase 81 | 82 | # Basic updates. Do super early so we can cache for a long time 83 | RUN apt update 84 | RUN apt install -y curl 85 | RUN apt install -y git 86 | RUN apt install -y libaio-dev 87 | 88 | # Copy core python environment from pytorchbuild 89 | RUN mkdir -p /env 90 | COPY --from=buildpytorch /bin/micromamba /bin 91 | COPY --from=buildpytorch /env /env/ 92 | 93 | 94 | 95 | 96 | FROM devbase AS regularbase 97 | 98 | # Install dependancies 99 | ENV FLIT_ROOT_INSTALL=1 100 | RUN /bin/micromamba -r /env -n sd-grpc-server install -c defaults flit 101 | 102 | # We copy only the minimum for flit to run so avoid cache invalidation on code changes 103 | COPY pyproject.toml . 104 | COPY sdgrpcserver/__init__.py sdgrpcserver/ 105 | RUN touch README.md 106 | RUN /bin/micromamba -r /env -n sd-grpc-server run flit install --pth-file 107 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip cache purge 108 | 109 | # Setup NVM & Node for Localtunnel 110 | ENV NVM_DIR=/nvm 111 | ENV NODE_VERSION=16.18.0 112 | 113 | RUN mkdir -p $NVM_DIR 114 | 115 | RUN curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.1/install.sh | bash \ 116 | && . $NVM_DIR/nvm.sh \ 117 | && nvm install $NODE_VERSION \ 118 | && nvm alias default $NODE_VERSION \ 119 | && nvm use default 120 | 121 | 122 | 123 | 124 | FROM devbase AS tritonbase 125 | 126 | WORKDIR / 127 | ARG TRITON_REF=tags/v1.0 128 | RUN git clone https://github.com/openai/triton.git 129 | 130 | WORKDIR /triton 131 | RUN git checkout $TRITON_REF 132 | 133 | WORKDIR /triton/python 134 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip install cmake 135 | RUN apt install -y zlib1g-dev libtinfo-dev 136 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip install . 137 | 138 | RUN tar cvjf /triton.tbz /env/envs/sd-grpc-server/lib/python3.*/site-packages/triton* 139 | 140 | 141 | 142 | 143 | # Build Xformers 144 | 145 | FROM tritonbase AS xformersbase 146 | 147 | WORKDIR / 148 | ARG XFORMERS_REF=main 149 | RUN git clone https://github.com/facebookresearch/xformers.git 150 | 151 | WORKDIR /xformers 152 | RUN git checkout $XFORMERS_REF 153 | RUN git submodule update --init --recursive 154 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip install -r requirements.txt 155 | 156 | ENV FORCE_CUDA=1 157 | ENV TORCH_CUDA_ARCH_LIST="6.0;7.0;7.5;8.0;8.6;8.9" 158 | 159 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip install . 160 | 161 | RUN tar cvjf /xformers.tbz /env/envs/sd-grpc-server/lib/python3.*/site-packages/xformers* 162 | 163 | 164 | 165 | 166 | FROM tritonbase AS deepspeedbase 167 | 168 | WORKDIR / 169 | ARG DEEPSPEED_REF=tags/v0.7.4 170 | RUN git clone https://github.com/microsoft/DeepSpeed.git 171 | 172 | WORKDIR /DeepSpeed 173 | RUN git checkout $DEEPSPEED_REF 174 | RUN apt install -y libaio-dev 175 | 176 | ENV DS_BUILD_OPS=1 177 | ENV TORCH_CUDA_ARCH_LIST="6.0;7.0;7.5;8.0;8.6;8.9" 178 | ENV DS_BUILD_SPARSE_ATTN=0 179 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip install . 180 | 181 | RUN tar cvjf /deepspeed.tbz /env/envs/sd-grpc-server/lib/python3.*/site-packages/deepspeed* 182 | 183 | 184 | 185 | 186 | 187 | FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu20.04 AS main 188 | 189 | COPY --from=regularbase /bin/micromamba /bin/ 190 | RUN mkdir -p /env/envs 191 | COPY --from=regularbase /env/envs /env/envs/ 192 | RUN mkdir -p /nvm 193 | COPY --from=regularbase /nvm /nvm/ 194 | 195 | # Setup NVM & Node for Localtunnel 196 | ENV NVM_DIR=/nvm 197 | ENV NODE_VERSION=16.18.0 198 | 199 | ENV NODE_PATH $NVM_DIR/versions/node/v$NODE_VERSION/lib/node_modules 200 | ENV PATH $NVM_DIR/versions/node/v$NODE_VERSION/bin:$PATH 201 | 202 | RUN npm install -g localtunnel 203 | 204 | # Now we can copy everything we need 205 | COPY nonfree /nonfree/ 206 | COPY sdgrpcserver /sdgrpcserver/ 207 | COPY server.py . 208 | 209 | # Set up some config files 210 | RUN mkdir -p /huggingface 211 | RUN mkdir -p /weights 212 | RUN mkdir -p /config 213 | COPY sdgrpcserver/config/engines.yaml /config/engines.yaml 214 | 215 | # Set up some environment files 216 | 217 | ENV HF_HOME=/huggingface 218 | ENV HF_API_TOKEN=mustset 219 | ENV SD_ENGINECFG=/config/engines.yaml 220 | ENV SD_WEIGHT_ROOT=/weights 221 | 222 | CMD [ "/bin/micromamba", "-r", "env", "-n", "sd-grpc-server", "run", "python", "./server.py" ] 223 | 224 | 225 | 226 | 227 | FROM main as xformers 228 | 229 | COPY --from=xformersbase /xformers/requirements.txt / 230 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip install -r requirements.txt 231 | RUN rm requirements.txt 232 | 233 | COPY --from=deepspeedbase /DeepSpeed/requirements/requirements.txt / 234 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip install -r requirements.txt 235 | RUN rm requirements.txt 236 | 237 | COPY --from=tritonbase /triton.tbz / 238 | RUN tar xvjf /triton.tbz 239 | COPY --from=xformersbase /xformers.tbz / 240 | RUN tar xvjf /xformers.tbz 241 | COPY --from=deepspeedbase /deepspeed.tbz / 242 | RUN tar xvjf /deepspeed.tbz 243 | 244 | RUN rm /*.tbz 245 | 246 | CMD [ "/bin/micromamba", "-r", "env", "-n", "sd-grpc-server", "run", "python", "./server.py" ] 247 | 248 | 249 | -------------------------------------------------------------------------------- /Dockerfile.protoc: -------------------------------------------------------------------------------- 1 | FROM python:3 2 | 3 | RUN pip install grpcio==1.49.1 4 | RUN pip install grpcio-tools==1.49.1 5 | 6 | COPY docker_support/build_protoc.sh / 7 | 8 | RUN mkdir /src 9 | WORKDIR /src 10 | 11 | CMD [ "/build_protoc.sh" ] 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | An implementation of a server for the Stability AI API 2 | 3 | # Features 4 | 5 | ## Standard Stable Diffusion features 6 | 7 | - Create an image from just a text prompt (txt2img) 8 | - Create an image from an existing image and a text prompt (img2img) 9 | - Fill in a hole in an image, or extend an image (inpainting) 10 | 11 | ## Enhancements 12 | 13 | - Enhanced inpainting and outpainting, including Grafted Inpainting 14 | - When used with the standard Stable Diffusion V1.5 model, results are more consistent to the existing image 15 | - When used with a model such as Waifu Diffusion that does not have an inpaint model, can either "graft" 16 | the model on top of the Stable Diffusion inpainting or work in an exclusive model-independant model 17 | - Custom CLIP guidance allows using newer CLIP models to more accurately follow prompts 18 | - Faster and better results than the standard Diffusers version 19 | - Negative prompting and weighting of parts of a promt (send multiple `Prompt` objects with `text` and any positive or negative `weight`) 20 | - All K_Diffusion schedulers available, and working correctly (including DPM2, DPM2 Ancestral and Heun 21 | - Can load multiple pipelines, such as Stable and Waifu Diffusion, and swap between them as needed 22 | - Adjustable NSFW behaviour 23 | - Potentially lower memory requirements using a variety of model offloading techniques 24 | - Cancel over API (using GRPC cancel will abort the currently in progress generation) 25 | - Various performance optimisations 26 | + XFormers support, if installed 27 | + ToMe support, if nonfree code included (recommend XFormers instead where available, but ToMe doesn't have complicated dependancies) 28 | 29 | # Installation 30 | 31 | ## Colab (coming soon) 32 | 33 | ## Docker (easiest if you already have Docker, and an Nvidia GPU with 4GB+ VRAM) 34 | 35 | ``` 36 | docker run --gpus all -it -p 50051:50051 \ 37 | -e HF_API_TOKEN={your huggingface token} \ 38 | -e SD_LISTEN_TO_ALL=1 \ 39 | -v $HOME/.cache/huggingface:/huggingface \ 40 | -v `pwd`/weights:/weights \ 41 | hafriedlander/stable-diffusion-grpcserver:xformers-latest 42 | ``` 43 | 44 | #### Localtunnel 45 | 46 | The docker image has built-in support for localtunnel, which 47 | will expose the GRPC-WEB endpoint on an https domain. It will 48 | automatically set an access token key if you don't provide one. 49 | Check your Docker log for the values to use 50 | 51 | ``` 52 | -e SD_LOCALTUNNEL=1 \ 53 | ``` 54 | 55 | #### Volume mounts 56 | 57 | This will share the weights and huggingface cache, but you can 58 | mount other folders into the volume to do other things: 59 | 60 | - You can check out the latest version of the server code and then 61 | mount it into the Docker image to run the very latest code (including 62 | any local edits you make) 63 | 64 | ``` 65 | -v `pwd`/sdgrpcserver:/sdgrpcserver \ 66 | ``` 67 | 68 | - Or override the engines.yaml config by making a config directory, 69 | putting the engines.yaml in there, and mounting it to the image 70 | 71 | ``` 72 | -v `pwd`/config:/config \ 73 | ``` 74 | 75 | All the server arguments can be provided as environment variables, starting 76 | with SD: 77 | 78 | - SD_ENGINECFG 79 | - SD_GRPC_PORT 80 | - SD_HTTP_PORT 81 | - SD_VRAM_OPTIMISATION_LEVEL 82 | - SD_NSFW_BEHAVIOUR 83 | - SD_WEIGHT_ROOT 84 | - SD_HTTP_FILE_ROOT 85 | - SD_ACCESS_TOKEN 86 | - SD_LISTEN_TO_ALL 87 | - SD_ENABLE_MPS 88 | - SD_RELOAD 89 | - SD_LOCALTUNNEL 90 | 91 | #### Building the image locally 92 | 93 | ``` 94 | docker build --target main . 95 | ``` 96 | 97 | Or to build (slowly) with xformers 98 | 99 | ``` 100 | docker build --target xformers . 101 | ``` 102 | 103 | ## Locally (if you have an Nvidia GPU with 4GB+ VRAM, and prefer not to use Docker) 104 | 105 | ### Option 1 (recommended): 106 | 107 | Install Miniconda, then in a Conda console: 108 | 109 | ``` 110 | git clone https://github.com/hafriedlander/stable-diffusion-grpcserver.git 111 | cd stable-diffusion-grpcserver 112 | conda env create -f environment.yaml 113 | conda activate sd-grpc-server 114 | ``` 115 | 116 | Then for Windows: 117 | 118 | ``` 119 | set PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu116 120 | flit install --pth-file 121 | set HF_API_TOKEN={your huggingface token} 122 | python ./server.py 123 | ``` 124 | 125 | Or for Linux 126 | 127 | ``` 128 | PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu116 flit install --pth-file 129 | HF_API_TOKEN={your huggingface token} python ./server.py 130 | ``` 131 | 132 | ### Option 2: 133 | 134 | Create a directory and download https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/main/engines.yaml into it, then 135 | 136 | ``` 137 | set PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu116 138 | pip install stable-diffusion-grpcserver 139 | set HF_API_TOKEN={your huggingface token} 140 | sdgrpcserver 141 | ``` 142 | 143 | 144 | # Thanks to / Credits: 145 | 146 | - Seamless outpainting https://github.com/parlance-zz/g-diffuser-bot/tree/g-diffuser-bot-beta2 147 | - Additional schedulers https://github.com/hlky/diffusers 148 | - K-Diffusion integration example https://github.com/Birch-san/diffusers/blob/1472b70194ae6d7e51646c0d6787815a5bc65f75/examples/community/play.py 149 | 150 | # Roadmap 151 | 152 | Core API functions not working yet: 153 | 154 | - ChainGenerate not implemented 155 | 156 | Extra features to add 157 | 158 | - Progress reporting over the API is included but not exposed yet 159 | - Embedding params in png 160 | - Extra APIs 161 | - Image resizing 162 | - Aspect ratio shifting 163 | - Asset management 164 | - Extension negotiation so we can: 165 | - Ping back progress notices 166 | - Allow cancellation requests 167 | - Specify negative prompts 168 | - Community features: 169 | - Prompt calculation https://github.com/pharmapsychotic/clip-interrogator/blob/main/clip_interrogator.ipynb 170 | - Prompt suggestion https://huggingface.co/spaces/Gustavosta/MagicPrompt-Stable-Diffusion 171 | - Prompt compositing https://github.com/energy-based-model/Compositional-Visual-Generation-with-Composable-Diffusion-Models-PyTorch 172 | - Automasking https://github.com/ThstereforeGames/txt2mask 173 | - Huge seeds 174 | 175 | 176 | # License 177 | 178 | The main codebase is distributed under Apache-2.0. Dependancies are all compatible with that license, except as noted here: 179 | 180 | - The nonfree directory contains code under some license that is more restrictive than Apache-2.0. Check the individual 181 | projects for license details. To fully comply with the Apache-2.0 license, remove this folder before release. 182 | + ToMe 183 | + Codeformer 184 | - The Docker images contain a bunch of software under various open source licenses. The docker images tagged 'noncomm' 185 | include the nonfree folder, and so cannot be used commercially. 186 | 187 | [![Stable Cabal Logo](stablecabal.png)](https://www.stablecabal.org/) 188 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | git submodule update --init --recursive 4 | docker run --rm -it -v `pwd`:/src $(docker build -q . -f Dockerfile.protoc) bash /build_protoc.sh 5 | -------------------------------------------------------------------------------- /docker_support/CMakeFile.txt.diff: -------------------------------------------------------------------------------- 1 | --- CMakeLists.txt.orig 2022-04-20 21:35:50.000000000 +0000 2 | +++ CMakeLists.txt 2022-10-19 23:14:02.999699993 +0000 3 | @@ -130,11 +130,11 @@ 4 | # NV_SM accumulates sm_xx for all requested versions 5 | # NV_COMP is compute_xx for highest requested version 6 | set( NV_SM "" ) 7 | set( NV_COMP "" ) 8 | 9 | - set(CUDA_SEPARABLE_COMPILATION ON) 10 | + set(CUDA_SEPARABLE_COMPILATION OFF) 11 | 12 | # nvcc >= 6.5 supports -std=c++11, so propagate CXXFLAGS to NVCCFLAGS. 13 | # Older nvcc didn't support -std=c++11, so previously we disabled propagation. 14 | ##if (${CMAKE_CXX_FLAGS} MATCHES -std=) 15 | ## set( CUDA_PROPAGATE_HOST_FLAGS OFF ) 16 | @@ -292,15 +292,31 @@ 17 | set( NV_SM ${NV_SM} -gencode arch=compute_80,code=sm_80 ) 18 | set( NV_COMP -gencode arch=compute_80,code=compute_80 ) 19 | message( STATUS " compile for CUDA arch 8.0 (Ampere)" ) 20 | endif() 21 | 22 | + if (GPU_TARGET MATCHES sm_89) 23 | + if (NOT MIN_ARCH) 24 | + set( MIN_ARCH 890 ) 25 | + endif() 26 | + set( NV_SM ${NV_SM} -gencode arch=compute_89,code=sm_89 ) 27 | + set( NV_COMP -gencode arch=compute_89,code=compute_89 ) 28 | + message( STATUS " compile for CUDA arch 8.9 (Ada Lovelace)" ) 29 | + endif() 30 | + 31 | + if ( ${GPU_TARGET} MATCHES "All") 32 | + set( MIN_ARCH 600 ) 33 | + SET( NV_SM ${CUDA_ARCH_LIST}) 34 | + SET( NV_COMP "") 35 | + endif() 36 | + 37 | if (NOT MIN_ARCH) 38 | message( FATAL_ERROR "GPU_TARGET must contain one or more of Fermi, Kepler, Maxwell, Pascal, Volta, Turing, Ampere, or valid sm_[0-9][0-9]" ) 39 | endif() 40 | 41 | - set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fPIC ${NV_SM} ${NV_COMP} ${FORTRAN_CONVENTION} ) 42 | + #set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fPIC ${NV_SM} ${NV_COMP} ${FORTRAN_CONVENTION} ) 43 | + set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -DHAVE_CUBLAS -Xfatbin -compress-all -Xcompiler -fPIC ${NV_SM} ${NV_COMP} ${FORTRAN_CONVENTION} ) 44 | #add_definitions( "-DMAGMA_HAVE_CUDA -DMAGMA_CUDA_ARCH_MIN=${MIN_ARCH}" ) 45 | set(MAGMA_HAVE_CUDA "1") 46 | set(MAGMA_CUDA_ARCH_MIN "${MIN_ARCH}") 47 | message( STATUS "Define -DMAGMA_HAVE_CUDA -DMAGMA_CUDA_ARCH_MIN=${MIN_ARCH}" ) 48 | else() 49 | @@ -749,11 +765,11 @@ 50 | file( GLOB headers include/*.h sparse/include/*.h "${CMAKE_BINARY_DIR}/include/*.h" ) 51 | else() 52 | file( GLOB headers include/*.h sparse_hip/include/*.h "${CMAKE_BINARY_DIR}/include/*.h" ) 53 | endif() 54 | if (USE_FORTRAN) 55 | - install( FILES ${headers} ${modules} 56 | + install( FILES ${headers} 57 | DESTINATION include ) 58 | else() 59 | install( FILES ${headers} DESTINATION include ) 60 | endif() 61 | -------------------------------------------------------------------------------- /docker_support/build_protoc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python -m grpc_tools.protoc -Iapi-interfaces/src/tensorizer/proto --python_out=sdgrpcserver/generated --grpc_python_out=sdgrpcserver/generated api-interfaces/src/tensorizer/proto/tensors.proto 4 | python -m grpc_tools.protoc -Iapi-interfaces/src/tensorizer/proto -Iapi-interfaces/src/proto --python_out=sdgrpcserver/generated --grpc_python_out=sdgrpcserver/generated api-interfaces/src/proto/generation.proto 5 | python -m grpc_tools.protoc -Iapi-interfaces/src/tensorizer/proto -Iapi-interfaces/src/proto --python_out=sdgrpcserver/generated --grpc_python_out=sdgrpcserver/generated api-interfaces/src/proto/engines.proto 6 | python -m grpc_tools.protoc -Iapi-interfaces/src/tensorizer/proto -Iapi-interfaces/src/proto --python_out=sdgrpcserver/generated --grpc_python_out=sdgrpcserver/generated api-interfaces/src/proto/dashboard.proto 7 | 8 | -------------------------------------------------------------------------------- /docker_support/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ -z ${SD_CLOUDFLARE} ]; then 3 | SERVICE="python" 4 | if pgrep -x "$SERVICE" >/dev/null 5 | then 6 | echo "server is running" 7 | else 8 | /bin/micromamba -r env -n sd-grpc-server run python ./server.py 9 | fi 10 | else 11 | SERVICE="python" 12 | if pgrep -x "$SERVICE" >/dev/null 13 | then 14 | echo "server is running" 15 | else 16 | /bin/micromamba -r env -n sd-grpc-server run python ./server.py & 17 | fi 18 | FILE=./cloudflared-linux-amd64 19 | if [ ! -f "$FILE" ]; then 20 | wget https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 21 | chmod +x cloudflared-linux-amd64 22 | fi 23 | ./cloudflared-linux-amd64 tunnel --url http://localhost:5000 24 | fi 25 | -------------------------------------------------------------------------------- /environment.yaml: -------------------------------------------------------------------------------- 1 | name: sd-grpc-server 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - git 7 | - python=3.10 8 | - pip 9 | - flit 10 | - psutil 11 | -------------------------------------------------------------------------------- /nonfree/README.md: -------------------------------------------------------------------------------- 1 | Projects in this folder have a license that is more restricted that the global project license 2 | 3 | For strict distribution and compliance with Apache-2.0, remove this folder prior to distribution 4 | 5 | stable-diffusion-grpcserver will work fine without these files, and is not a derivative work. 6 | -------------------------------------------------------------------------------- /nonfree/tome_memory_efficient_cross_attention.py: -------------------------------------------------------------------------------- 1 | 2 | # A merge of the CrossAttention blocks from ToMe and MemoryEfficientCrossAttention 3 | # (C) Hamish Friedlander 2022, All Rights Reserved. Distributable under the same license as ToMe 4 | 5 | from typing import Tuple, Union 6 | 7 | import torch 8 | from diffusers.models.attention import CrossAttention 9 | from tome.merge import bipartite_soft_matching, merge_source, merge_wavg 10 | from tome.utils import parse_r 11 | 12 | try: 13 | import xformers 14 | import xformers.ops 15 | except: 16 | xformers = None 17 | 18 | def has_xformers(): 19 | return xformers is not None 20 | 21 | class ToMeMemoryEfficientCrossAttention(CrossAttention): 22 | def forward(self, hidden_states, context=None, mask=None): 23 | 24 | # This bit from ToMe 25 | 26 | batch_size, sequence_length, _ = hidden_states.shape 27 | 28 | q = self.to_q(hidden_states) 29 | context = context if context is not None else hidden_states 30 | k = self.to_k(context) 31 | v = self.to_v(context) 32 | dim = q.shape[-1] 33 | r = self._tome_info["r"].pop(0) 34 | if r > 0: 35 | # Apply ToMe here 36 | merge, _ = bipartite_soft_matching( 37 | k, 38 | r, 39 | self._tome_info["class_token"], 40 | self._tome_info["distill_token"], 41 | ) 42 | if self._tome_info["trace_source"]: 43 | self._tome_info["source"] = merge_source( 44 | merge, k, self._tome_info["source"] 45 | ) 46 | self._tome_info["source"] = merge_source( 47 | merge, v, self._tome_info["source"] 48 | ) 49 | k, self._tome_info["size"] = merge_wavg(merge, k) 50 | v, self._tome_info["size"] = merge_wavg(merge, v) 51 | 52 | # This bit from MemoryEfficientCrossAttention 53 | 54 | b, _, _ = q.shape 55 | q, k, v = map( 56 | lambda t: t.unsqueeze(3) 57 | .reshape(b, t.shape[1], self.heads, self.dim_head) 58 | .permute(0, 2, 1, 3) 59 | .reshape(b * self.heads, t.shape[1], self.dim_head) 60 | .contiguous(), 61 | (q, k, v), 62 | ) 63 | 64 | # actually compute the attention, what we cannot get enough of 65 | out = xformers.ops.memory_efficient_attention(q, k, v, attn_bias=None, op=None) 66 | 67 | # TODO: Use this directly in the attention operation, as a bias 68 | if mask is not None: 69 | raise NotImplementedError 70 | out = ( 71 | out.unsqueeze(0) 72 | .reshape(b, self.heads, out.shape[1], self.dim_head) 73 | .permute(0, 2, 1, 3) 74 | .reshape(b, out.shape[1], self.heads * self.dim_head) 75 | ) 76 | return self.to_out(out) 77 | -------------------------------------------------------------------------------- /nonfree/tome_patcher.py: -------------------------------------------------------------------------------- 1 | from diffusers import UNet2DConditionModel 2 | from diffusers.models.attention import SpatialTransformer, BasicTransformerBlock 3 | 4 | from nonfree.tome_unet import ToMeUNet, ToMeSpatialTransformer, ToMeCrossAttention 5 | from nonfree.tome_memory_efficient_cross_attention import has_xformers, ToMeMemoryEfficientCrossAttention 6 | 7 | from sdgrpcserver.pipeline.models.memory_efficient_cross_attention import MemoryEfficientCrossAttention 8 | 9 | def apply_tome( 10 | model: UNet2DConditionModel, trace_source: bool = False, prop_attn: bool = True 11 | ): 12 | """ 13 | Applies ToMe to this transformer. Afterward, set r using model.r. 14 | 15 | If you want to know the source of each token (e.g., for visualization), set trace_source = true. 16 | The sources will be available at model._tome_info["source"] afterward. 17 | 18 | For proportional attention, set prop_attn to True. This is only necessary when evaluating models off 19 | the shelf. For trianing and for evaluating MAE models off the self set this to be False. 20 | """ 21 | 22 | model.__class__ = ToMeUNet 23 | model.r = 0 24 | model._tome_info = { 25 | "r": model.r, 26 | "size": None, 27 | "source": None, 28 | "trace_source": trace_source, 29 | "prop_attn": prop_attn, 30 | "class_token": False, 31 | "distill_token": False, 32 | } 33 | 34 | if hasattr(model, "dist_token") and model.dist_token is not None: 35 | model._tome_info["distill_token"] = True 36 | 37 | for module in model.modules(): 38 | if isinstance(module, SpatialTransformer): 39 | module.__class__ = ToMeSpatialTransformer 40 | if isinstance(module, BasicTransformerBlock): 41 | #module.__class__ = ToMeTransformerBlock 42 | #module._tome_info = model._tome_info 43 | if isinstance(module.attn1, MemoryEfficientCrossAttention): 44 | module.attn1.__class__ = ToMeMemoryEfficientCrossAttention 45 | else: 46 | module.attn1.__class__ = ToMeCrossAttention 47 | module.attn1._tome_info = model._tome_info 48 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["flit_core >=3.2,<4"] 3 | build-backend = "flit_core.buildapi" 4 | 5 | [project] 6 | name = "stable-diffusion-grpcserver" 7 | authors = [{ name = "Hamish Friedlander", email = "hafriedlander@gmail.com" }] 8 | readme = "README.md" 9 | license = { file = "LICENSE" } 10 | classifiers = ["License :: OSI Approved :: Apache Software License"] 11 | dynamic = ["version", "description"] 12 | dependencies = [ 13 | # Core pipeline 14 | "torch ~= 1.12.1", 15 | "einops ~= 0.5.0", 16 | "torchvision ~= 0.13.1", 17 | "numpy ~= 1.23.3", 18 | "opencv-python-headless ~= 4.6.0.66", 19 | "scipy ~= 1.9.1", 20 | "ftfy ~= 6.1.1", 21 | "transformers ~= 4.25.1", 22 | "diffusers ~= 0.10.2", 23 | "accelerate ~= 0.13.2", 24 | "easing-functions ~= 1.0.4", 25 | # For ToMe 26 | "timm ~= 0.6.11", 27 | # For Structured Diffusion 28 | "nltk ~= 3.7", 29 | "stanza ~= 1.4.2", 30 | # For K-Diffusion 31 | "torchdiffeq ~= 0.2.3", 32 | "torchsde ~= 0.2.5", 33 | # For Server 34 | "protobuf ~= 3.20", 35 | "grpcio ~= 1.48.1", 36 | "wsgicors ~= 0.7.0", 37 | "Twisted ~= 22.8.0", 38 | "hupper ~= 1.10.3", 39 | "watchdog ~= 2.1.9", 40 | "python-dotenv ~= 0.21.0", 41 | "service_identity ~= 21.1.0", 42 | # For Tests 43 | "pynvml ~= 11.4.1", 44 | ] 45 | 46 | [project.optional-dependencies] 47 | dev = ["black ~= 22.10.0", "flake8 ~= 6.0.0", "flake8-pyproject ~= 1.2.1"] 48 | 49 | [project.urls] 50 | Home = "https://github.com/hafriedlander/stable-diffusion-grpcserver" 51 | 52 | [project.scripts] 53 | sdgrpcserver = "sdgrpcserver.server:main" 54 | 55 | [tool.flit.module] 56 | name = "sdgrpcserver" 57 | 58 | [tool.isort] 59 | profile = "black" 60 | 61 | [tool.flake8] 62 | max-line-length = 88 63 | select = "C,E,F,W,B,B950" 64 | extend-ignore = "E203, E501, W503" 65 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Use the CUDA-enabled versions of pytorch 2 | --extra-index-url https://download.pytorch.org/whl/cu116 3 | 4 | # Python AI basics 5 | torch~=1.12.1 6 | torchvision~=0.13.1 7 | numpy~=1.23.3 8 | opencv-python~=4.6.0.66 9 | scipy~=1.9.1 10 | 11 | # Transformers 12 | transformers~=4.22.1 13 | -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers 14 | -e git+https://github.com/openai/CLIP.git@main#egg=clip 15 | diffusers~=0.4.1 16 | 17 | # Server libraries 18 | protobuf~=3.20 19 | grpcio~=1.48.1 20 | Flask~=2.2.2 21 | wsgicors~=0.7.0 22 | waitress~=2.1.2 23 | hupper~=1.10.3 24 | watchdog~=2.1.9 25 | 26 | # Additional AI libraries 27 | # These come from "taming transformers" or "diffusers" environment.yaml 28 | # and probably aren't needed for Stable Diffusion inferance 29 | #albumentations==0.4.3 30 | #pytorch-lightning==1.4.2 31 | #test-tube>=0.7.5 32 | #einops==0.3.0 33 | #torch-fidelity==0.3.0 34 | #torchmetrics==0.6.0 35 | #invisible-watermark 36 | 37 | # Other stuff from the taming transformers or diffusers environment.yaml 38 | #pudb==2019.2 39 | #imageio==2.9.0 40 | #imageio-ffmpeg==0.4.2 41 | #omegaconf==2.1.1 42 | #streamlit>=0.73.1 43 | -------------------------------------------------------------------------------- /sdgrpcserver/__init__.py: -------------------------------------------------------------------------------- 1 | """A local Stable Diffusion AI image generation server compatible with the Stability-AI GRPC protocol'""" 2 | 3 | __version__ = "0.0.1" -------------------------------------------------------------------------------- /sdgrpcserver/config/dist_hashes: -------------------------------------------------------------------------------- 1 | cdb93bad7d27d5825d4dc2925173442b04540d81 2 | 3bf70ef84926776a999f92d2641493942687dcbc 3 | b334b20ec2b037f126bf61b4fc780bbff9004283 4 | f327f51ccbc055a3e50596fd889bfd6abf60ef46 5 | 7fc705e0e387d4fb762098145f72e650cf00fd3a 6 | e4f24033e5ad4063b6b69758119920a389b2df8d 7 | b4e4a1cdb19b459617f5e0c2e670e2079911bf62 8 | 72e706b173461c04cae5607810b31ac425c1e719 9 | 2147a0769fbdd03c2157cc9394542eca089d2f21 10 | 74228d116acbc1ee462eae5635a69d0975952841 11 | 5c2f2449e58a306f937c36bdc97ea039d337aa23 12 | 7dc95fb6dfa8fde48b66052d2044d1a1d8302964 13 | c639b7da0becbd74306ab94a1490e7811322b63c 14 | 88dcc9c93e23dce7723cbdc1bb55b14d80d13aaa 15 | 05e1fb9c1700f0833b8ffb593440f7000dc4cd35 16 | a6ab03a095ba36d06b15af41649989b38cd918b9 17 | c3e31f14155e6db72d2a67e6883ff14f16689332 18 | a21f0ca8937fce9638b729c38cac3ec288903ef7 19 | ed6826bb6a2e86c558c7eda5f65a8bc10bf28438 20 | 606b469ecbe60b74ab0006768b045c353239db9c 21 | -------------------------------------------------------------------------------- /sdgrpcserver/config/genhashes.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | (for x in `git log --pretty=format:"%H" --diff-filter=d --reverse -- ../../engines.yaml` ; do git rev-parse "${x}:../../engines.yaml" ; done) > dist_hashes 3 | (for x in `git log --pretty=format:"%H" --diff-filter=d --reverse -- ./engines.yaml` ; do git rev-parse "${x}:./engines.yaml" ; done) >> dist_hashes 4 | 5 | -------------------------------------------------------------------------------- /sdgrpcserver/constants.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | debug_path = os.environ.get("SD_DEBUG_PATH", False) 4 | if not debug_path: debug_path = os.path.join( 5 | os.path.dirname(os.path.dirname(__file__)), 6 | "/tests/out/" 7 | ) 8 | -------------------------------------------------------------------------------- /sdgrpcserver/debug_recorder.py: -------------------------------------------------------------------------------- 1 | import glob, os, tempfile, platform, time 2 | 3 | import yaml 4 | try: 5 | from yaml import CLoader as Loader, CDumper as Dumper 6 | except ImportError: 7 | from yaml import Loader, Dumper 8 | 9 | try: 10 | import gzip 11 | except: 12 | gzip = None 13 | 14 | record_modules = [ 15 | "torch", 16 | "torchvision", 17 | "numpy", 18 | "opencv-python-headless", 19 | "scipy", 20 | "transformers", 21 | "diffusers", 22 | ] 23 | 24 | try: 25 | from importlib.metadata import version 26 | def get_module_version(module): return version(module) 27 | except: 28 | import pkg_resources 29 | def get_module_version(module): return pkg_resources.get_distribution(module).version 30 | 31 | class DebugContext: 32 | def __init__(self, recorder, label): 33 | self.recorder = recorder 34 | self.events = [] 35 | self.store('label', label) 36 | self.store('uname', platform.uname()) 37 | self.store('python version', platform.python_version()) 38 | self.store('module versions', self.get_module_versions()) 39 | 40 | def get_module_versions(self): 41 | return {module: get_module_version(module) for module in record_modules} 42 | 43 | def store(self, label, data): 44 | self.events.append((label, data)) 45 | 46 | def __enter__(self): 47 | return self 48 | 49 | def __exit__(self, exc_type, exc_value, exc_traceback): 50 | if exc_type: 51 | self.store('exception', [exc_type, exc_value, exc_traceback]) 52 | 53 | self.recorder.store(self.events) 54 | 55 | class DebugRecorder: 56 | def __init__(self, storage_time=10*60): 57 | self.storage_time = storage_time 58 | self.storage_path = os.path.join(tempfile.gettempdir(), "sdgrpcserver_debug") 59 | 60 | if not os.path.exists(self.storage_path): os.mkdir(self.storage_path) 61 | 62 | def garbage_collect(self): 63 | now = time.time() 64 | for path in glob.glob(os.path.join(self.storage_path, "*.dump*")): 65 | mtime = os.path.getmtime(path) 66 | if mtime < now - self.storage_time: 67 | print("Debug record expired: ", path) 68 | os.unlink(path) 69 | 70 | def record(self, label): 71 | return DebugContext(self, label) 72 | 73 | def store(self, events): 74 | now = time.time() 75 | path = f"debug-{now}.dump" 76 | data = yaml.dump(events, Dumper=Dumper) 77 | 78 | if gzip: 79 | path = f"debug-{now}.dump.gz" 80 | data = gzip.compress(bytes(data, "utf8")) 81 | 82 | with open(os.path.join(self.storage_path, path), "wb") as f: 83 | f.write(data) 84 | 85 | self.garbage_collect() 86 | 87 | class DebugNullRecorder: 88 | def __init__(self): 89 | pass 90 | 91 | def record(self, label): 92 | return self 93 | 94 | def __enter__(self): 95 | return self 96 | 97 | def __exit__(self, exc_type, exc_value, exc_traceback): 98 | pass 99 | 100 | def get_module_versions(self): 101 | return {} 102 | 103 | def store(self, label, data): 104 | pass 105 | 106 | -------------------------------------------------------------------------------- /sdgrpcserver/generated/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /sdgrpcserver/generated/engines_pb2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by the protocol buffer compiler. DO NOT EDIT! 3 | # source: engines.proto 4 | """Generated protocol buffer code.""" 5 | from google.protobuf.internal import builder as _builder 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import descriptor_pool as _descriptor_pool 8 | from google.protobuf import symbol_database as _symbol_database 9 | # @@protoc_insertion_point(imports) 10 | 11 | _sym_db = _symbol_database.Default() 12 | 13 | 14 | import generation_pb2 as generation__pb2 15 | 16 | 17 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rengines.proto\x12\x07gooseai\x1a\x10generation.proto\"\xdf\x01\n\rEngineSampler\x12*\n\x07sampler\x18\x01 \x01(\x0e\x32\x19.gooseai.DiffusionSampler\x12\x14\n\x0csupports_eta\x18\n \x01(\x08\x12\x16\n\x0esupports_churn\x18\x0b \x01(\x08\x12\x1d\n\x15supports_sigma_limits\x18\x0c \x01(\x08\x12\x1b\n\x13supports_karras_rho\x18\r \x01(\x08\x12\x38\n\x15supported_noise_types\x18\x14 \x03(\x0e\x32\x19.gooseai.SamplerNoiseType\"\xde\x01\n\nEngineInfo\x12\n\n\x02id\x18\x01 \x01(\t\x12\r\n\x05owner\x18\x02 \x01(\t\x12\r\n\x05ready\x18\x03 \x01(\x08\x12!\n\x04type\x18\x04 \x01(\x0e\x32\x13.gooseai.EngineType\x12+\n\ttokenizer\x18\x05 \x01(\x0e\x32\x18.gooseai.EngineTokenizer\x12\x0c\n\x04name\x18\x06 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x07 \x01(\t\x12\x33\n\x12supported_samplers\x18\xf4\x03 \x03(\x0b\x32\x16.gooseai.EngineSampler\"\x14\n\x12ListEnginesRequest\".\n\x07\x45ngines\x12#\n\x06\x65ngine\x18\x01 \x03(\x0b\x32\x13.gooseai.EngineInfo*Z\n\nEngineType\x12\x08\n\x04TEXT\x10\x00\x12\x0b\n\x07PICTURE\x10\x01\x12\t\n\x05\x41UDIO\x10\x02\x12\t\n\x05VIDEO\x10\x03\x12\x12\n\x0e\x43LASSIFICATION\x10\x04\x12\x0b\n\x07STORAGE\x10\x05*%\n\x0f\x45ngineTokenizer\x12\x08\n\x04GPT2\x10\x00\x12\x08\n\x04PILE\x10\x01\x32P\n\x0e\x45nginesService\x12>\n\x0bListEngines\x12\x1b.gooseai.ListEnginesRequest\x1a\x10.gooseai.Engines\"\x00\x42\x0cZ\n./;enginesb\x06proto3') 18 | 19 | _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) 20 | _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'engines_pb2', globals()) 21 | if _descriptor._USE_C_DESCRIPTORS == False: 22 | 23 | DESCRIPTOR._options = None 24 | DESCRIPTOR._serialized_options = b'Z\n./;engines' 25 | _ENGINETYPE._serialized_start=565 26 | _ENGINETYPE._serialized_end=655 27 | _ENGINETOKENIZER._serialized_start=657 28 | _ENGINETOKENIZER._serialized_end=694 29 | _ENGINESAMPLER._serialized_start=45 30 | _ENGINESAMPLER._serialized_end=268 31 | _ENGINEINFO._serialized_start=271 32 | _ENGINEINFO._serialized_end=493 33 | _LISTENGINESREQUEST._serialized_start=495 34 | _LISTENGINESREQUEST._serialized_end=515 35 | _ENGINES._serialized_start=517 36 | _ENGINES._serialized_end=563 37 | _ENGINESSERVICE._serialized_start=696 38 | _ENGINESSERVICE._serialized_end=776 39 | # @@protoc_insertion_point(module_scope) 40 | -------------------------------------------------------------------------------- /sdgrpcserver/generated/engines_pb2_grpc.py: -------------------------------------------------------------------------------- 1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! 2 | """Client and server classes corresponding to protobuf-defined services.""" 3 | import grpc 4 | 5 | import engines_pb2 as engines__pb2 6 | 7 | 8 | class EnginesServiceStub(object): 9 | """Missing associated documentation comment in .proto file.""" 10 | 11 | def __init__(self, channel): 12 | """Constructor. 13 | 14 | Args: 15 | channel: A grpc.Channel. 16 | """ 17 | self.ListEngines = channel.unary_unary( 18 | '/gooseai.EnginesService/ListEngines', 19 | request_serializer=engines__pb2.ListEnginesRequest.SerializeToString, 20 | response_deserializer=engines__pb2.Engines.FromString, 21 | ) 22 | 23 | 24 | class EnginesServiceServicer(object): 25 | """Missing associated documentation comment in .proto file.""" 26 | 27 | def ListEngines(self, request, context): 28 | """Missing associated documentation comment in .proto file.""" 29 | context.set_code(grpc.StatusCode.UNIMPLEMENTED) 30 | context.set_details('Method not implemented!') 31 | raise NotImplementedError('Method not implemented!') 32 | 33 | 34 | def add_EnginesServiceServicer_to_server(servicer, server): 35 | rpc_method_handlers = { 36 | 'ListEngines': grpc.unary_unary_rpc_method_handler( 37 | servicer.ListEngines, 38 | request_deserializer=engines__pb2.ListEnginesRequest.FromString, 39 | response_serializer=engines__pb2.Engines.SerializeToString, 40 | ), 41 | } 42 | generic_handler = grpc.method_handlers_generic_handler( 43 | 'gooseai.EnginesService', rpc_method_handlers) 44 | server.add_generic_rpc_handlers((generic_handler,)) 45 | 46 | 47 | # This class is part of an EXPERIMENTAL API. 48 | class EnginesService(object): 49 | """Missing associated documentation comment in .proto file.""" 50 | 51 | @staticmethod 52 | def ListEngines(request, 53 | target, 54 | options=(), 55 | channel_credentials=None, 56 | call_credentials=None, 57 | insecure=False, 58 | compression=None, 59 | wait_for_ready=None, 60 | timeout=None, 61 | metadata=None): 62 | return grpc.experimental.unary_unary(request, target, '/gooseai.EnginesService/ListEngines', 63 | engines__pb2.ListEnginesRequest.SerializeToString, 64 | engines__pb2.Engines.FromString, 65 | options, channel_credentials, 66 | insecure, call_credentials, compression, wait_for_ready, timeout, metadata) 67 | -------------------------------------------------------------------------------- /sdgrpcserver/generated/generation_pb2_grpc.py: -------------------------------------------------------------------------------- 1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! 2 | """Client and server classes corresponding to protobuf-defined services.""" 3 | import grpc 4 | 5 | import generation_pb2 as generation__pb2 6 | 7 | 8 | class GenerationServiceStub(object): 9 | """ 10 | gRPC services 11 | 12 | """ 13 | 14 | def __init__(self, channel): 15 | """Constructor. 16 | 17 | Args: 18 | channel: A grpc.Channel. 19 | """ 20 | self.Generate = channel.unary_stream( 21 | '/gooseai.GenerationService/Generate', 22 | request_serializer=generation__pb2.Request.SerializeToString, 23 | response_deserializer=generation__pb2.Answer.FromString, 24 | ) 25 | self.ChainGenerate = channel.unary_stream( 26 | '/gooseai.GenerationService/ChainGenerate', 27 | request_serializer=generation__pb2.ChainRequest.SerializeToString, 28 | response_deserializer=generation__pb2.Answer.FromString, 29 | ) 30 | 31 | 32 | class GenerationServiceServicer(object): 33 | """ 34 | gRPC services 35 | 36 | """ 37 | 38 | def Generate(self, request, context): 39 | """Missing associated documentation comment in .proto file.""" 40 | context.set_code(grpc.StatusCode.UNIMPLEMENTED) 41 | context.set_details('Method not implemented!') 42 | raise NotImplementedError('Method not implemented!') 43 | 44 | def ChainGenerate(self, request, context): 45 | """Missing associated documentation comment in .proto file.""" 46 | context.set_code(grpc.StatusCode.UNIMPLEMENTED) 47 | context.set_details('Method not implemented!') 48 | raise NotImplementedError('Method not implemented!') 49 | 50 | 51 | def add_GenerationServiceServicer_to_server(servicer, server): 52 | rpc_method_handlers = { 53 | 'Generate': grpc.unary_stream_rpc_method_handler( 54 | servicer.Generate, 55 | request_deserializer=generation__pb2.Request.FromString, 56 | response_serializer=generation__pb2.Answer.SerializeToString, 57 | ), 58 | 'ChainGenerate': grpc.unary_stream_rpc_method_handler( 59 | servicer.ChainGenerate, 60 | request_deserializer=generation__pb2.ChainRequest.FromString, 61 | response_serializer=generation__pb2.Answer.SerializeToString, 62 | ), 63 | } 64 | generic_handler = grpc.method_handlers_generic_handler( 65 | 'gooseai.GenerationService', rpc_method_handlers) 66 | server.add_generic_rpc_handlers((generic_handler,)) 67 | 68 | 69 | # This class is part of an EXPERIMENTAL API. 70 | class GenerationService(object): 71 | """ 72 | gRPC services 73 | 74 | """ 75 | 76 | @staticmethod 77 | def Generate(request, 78 | target, 79 | options=(), 80 | channel_credentials=None, 81 | call_credentials=None, 82 | insecure=False, 83 | compression=None, 84 | wait_for_ready=None, 85 | timeout=None, 86 | metadata=None): 87 | return grpc.experimental.unary_stream(request, target, '/gooseai.GenerationService/Generate', 88 | generation__pb2.Request.SerializeToString, 89 | generation__pb2.Answer.FromString, 90 | options, channel_credentials, 91 | insecure, call_credentials, compression, wait_for_ready, timeout, metadata) 92 | 93 | @staticmethod 94 | def ChainGenerate(request, 95 | target, 96 | options=(), 97 | channel_credentials=None, 98 | call_credentials=None, 99 | insecure=False, 100 | compression=None, 101 | wait_for_ready=None, 102 | timeout=None, 103 | metadata=None): 104 | return grpc.experimental.unary_stream(request, target, '/gooseai.GenerationService/ChainGenerate', 105 | generation__pb2.ChainRequest.SerializeToString, 106 | generation__pb2.Answer.FromString, 107 | options, channel_credentials, 108 | insecure, call_credentials, compression, wait_for_ready, timeout, metadata) 109 | -------------------------------------------------------------------------------- /sdgrpcserver/generated/tensors_pb2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by the protocol buffer compiler. DO NOT EDIT! 3 | # source: tensors.proto 4 | """Generated protocol buffer code.""" 5 | from google.protobuf.internal import builder as _builder 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import descriptor_pool as _descriptor_pool 8 | from google.protobuf import symbol_database as _symbol_database 9 | # @@protoc_insertion_point(imports) 10 | 11 | _sym_db = _symbol_database.Default() 12 | 13 | 14 | 15 | 16 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rtensors.proto\x12\x07tensors\"\x82\x01\n\x06Tensor\x12\x1d\n\x05\x64type\x18\x01 \x01(\x0e\x32\x0e.tensors.Dtype\x12\r\n\x05shape\x18\x02 \x03(\x03\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\x12.\n\tattr_type\x18\x04 \x01(\x0e\x32\x16.tensors.AttributeTypeH\x00\x88\x01\x01\x42\x0c\n\n_attr_type\"\xac\x01\n\tAttribute\x12\x0c\n\x04name\x18\x01 \x01(\t\x12!\n\x06module\x18\x03 \x01(\x0b\x32\x0f.tensors.ModuleH\x00\x12!\n\x06tensor\x18\x04 \x01(\x0b\x32\x0f.tensors.TensorH\x00\x12\x10\n\x06string\x18\x05 \x01(\tH\x00\x12\x0f\n\x05int64\x18\x06 \x01(\x03H\x00\x12\x0f\n\x05\x66loat\x18\x07 \x01(\x02H\x00\x12\x0e\n\x04\x62ool\x18\x08 \x01(\x08H\x00\x42\x07\n\x05value\"M\n\x06Module\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05names\x18\x02 \x03(\t\x12&\n\nattributes\x18\x03 \x03(\x0b\x32\x12.tensors.Attribute*\x9e\x02\n\x05\x44type\x12\x0e\n\nDT_INVALID\x10\x00\x12\x0e\n\nDT_FLOAT32\x10\x01\x12\x0e\n\nDT_FLOAT64\x10\x02\x12\x0e\n\nDT_FLOAT16\x10\x03\x12\x0f\n\x0b\x44T_BFLOAT16\x10\x04\x12\x10\n\x0c\x44T_COMPLEX32\x10\x05\x12\x10\n\x0c\x44T_COMPLEX64\x10\x06\x12\x11\n\rDT_COMPLEX128\x10\x07\x12\x0c\n\x08\x44T_UINT8\x10\x08\x12\x0b\n\x07\x44T_INT8\x10\t\x12\x0c\n\x08\x44T_INT16\x10\n\x12\x0c\n\x08\x44T_INT32\x10\x0b\x12\x0c\n\x08\x44T_INT64\x10\x0c\x12\x0b\n\x07\x44T_BOOL\x10\r\x12\r\n\tDT_QUINT8\x10\x0e\x12\x0c\n\x08\x44T_QINT8\x10\x0f\x12\r\n\tDT_QINT32\x10\x10\x12\x0f\n\x0b\x44T_QUINT4_2\x10\x11*0\n\rAttributeType\x12\x10\n\x0c\x41T_PARAMETER\x10\x00\x12\r\n\tAT_BUFFER\x10\x01\x42)Z\'github.com/coreweave/tensorizer/tensorsb\x06proto3') 17 | 18 | _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) 19 | _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'tensors_pb2', globals()) 20 | if _descriptor._USE_C_DESCRIPTORS == False: 21 | 22 | DESCRIPTOR._options = None 23 | DESCRIPTOR._serialized_options = b'Z\'github.com/coreweave/tensorizer/tensors' 24 | _DTYPE._serialized_start=414 25 | _DTYPE._serialized_end=700 26 | _ATTRIBUTETYPE._serialized_start=702 27 | _ATTRIBUTETYPE._serialized_end=750 28 | _TENSOR._serialized_start=27 29 | _TENSOR._serialized_end=157 30 | _ATTRIBUTE._serialized_start=160 31 | _ATTRIBUTE._serialized_end=332 32 | _MODULE._serialized_start=334 33 | _MODULE._serialized_end=411 34 | # @@protoc_insertion_point(module_scope) 35 | -------------------------------------------------------------------------------- /sdgrpcserver/generated/tensors_pb2_grpc.py: -------------------------------------------------------------------------------- 1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! 2 | """Client and server classes corresponding to protobuf-defined services.""" 3 | import grpc 4 | 5 | -------------------------------------------------------------------------------- /sdgrpcserver/images.py: -------------------------------------------------------------------------------- 1 | # Utility functions for handling images as PyTorch Tensors 2 | 3 | # All images in are in BCHW unless specified in the variable name, as floating point 0..1 4 | # All functions will handle RGB or RGBA images 5 | 6 | from math import ceil 7 | 8 | import cv2 as cv 9 | import numpy as np 10 | import PIL 11 | import torch 12 | import torchvision 13 | 14 | 15 | def fromPIL(image): 16 | # Get as numpy HWC 0..1 17 | rgbHWC = np.array(image).astype(np.float32) / 255.0 18 | # Convert to BCHW 19 | rgbBCHW = rgbHWC[None].transpose(0, 3, 1, 2) 20 | # And convert to Tensor 21 | return torch.from_numpy(rgbBCHW) 22 | 23 | 24 | def toPIL(tensor): 25 | # Convert to BCHW if just CHW 26 | if tensor.ndim == 3: 27 | tensor = tensor[None, ...] 28 | # Then convert to BHWC 29 | rgbBHWC = tensor.permute(0, 2, 3, 1) 30 | # Then convert from 0..1 to 0..255 31 | images = (rgbBHWC.to(torch.float32) * 255).round().to(torch.uint8).cpu().numpy() 32 | # And put into PIL image instances 33 | return [PIL.Image.fromarray(image) for image in images] 34 | 35 | 36 | def fromCV(bgrHWC): 37 | bgrBCHW = bgrHWC[None].transpose(0, 3, 1, 2) 38 | channels = [2, 1, 0, 3][bgrBCHW.shape[1]] 39 | return torch.from_numpy(bgrBCHW)[:, channels].to(torch.float32) / 255.0 40 | 41 | 42 | def toCV(tensor): 43 | if tensor.ndim == 3: 44 | tensor = tensor[None, ...] 45 | 46 | bgrBCHW = tensor[:, [2, 1, 0, 3][: tensor.shape[1]]] 47 | bgrBHWC = bgrBCHW.permute(0, 2, 3, 1) 48 | 49 | return (bgrBHWC.to(torch.float32) * 255).round().to(torch.uint8).cpu().numpy() 50 | 51 | 52 | def fromPngBytes(bytes): 53 | intensor = torch.tensor(np.frombuffer(bytes, dtype=np.uint8)) 54 | asuint8 = torchvision.io.decode_image( 55 | intensor, torchvision.io.image.ImageReadMode.RGB_ALPHA 56 | ) 57 | return asuint8[None, ...].to(torch.float32) / 255 58 | 59 | 60 | # Images with alpha will be slow for now. TODO: Move to OpenCV (torchvision does not support encoding alpha images) 61 | def toPngBytes(tensor): 62 | if tensor.ndim == 3: 63 | tensor = tensor[None, ...] 64 | 65 | if tensor.shape[1] == 1 or tensor.shape[1] == 3: 66 | tensor = (tensor.to(torch.float32) * 255).round().to(torch.uint8) 67 | pngs = [torchvision.io.encode_png(image) for image in tensor] 68 | return [png.numpy().tobytes() for png in pngs] 69 | elif tensor.shape[1] == 4: 70 | images = toCV(tensor) 71 | return [cv.imencode(".png", image)[1].tobytes() for image in images] 72 | else: 73 | print(f"Don't know how to save PNGs with {tensor.shape[1]} channels") 74 | return [] 75 | 76 | 77 | # TOOD: This won't work on images with alpha 78 | def levels(tensor, in0, in1, out0, out1): 79 | c = (out1 - out0) / (in1 - in0) 80 | return ((tensor - in0) * c + out0).clamp(0, 1) 81 | 82 | 83 | def invert(tensor): 84 | return 1 - tensor 85 | 86 | 87 | # 0, 1, 2, 3 = r, g, b, a | 4 = 0 | 5 = 1 | 6 = drop 88 | # TODO: These are from generation.proto, but we should be nicer about the mapping 89 | def channelmap(tensor, srcchannels): 90 | # Any that are 6 won't be in final output 91 | outchannels = [x for x in srcchannels if x != 6] 92 | # Any channel request that is higher than channels available, just use channel 0 93 | # (This also deals with channels we will later fill with zero or one) 94 | cpychannels = [x if x < tensor.shape[1] else 0 for x in outchannels] 95 | 96 | # Copy the desired source channel into place (or the first channel if we will replace in the next step) 97 | tensor = tensor[:, cpychannels] 98 | 99 | # Replace any channels with 0 or 1 if requested 100 | for i, c in enumerate(outchannels): 101 | if c == 4: 102 | tensor[:, i] = torch.zeros_like(tensor[0][i]) 103 | elif c == 5: 104 | tensor[:, i] = torch.ones_like(tensor[0][i]) 105 | 106 | return tensor 107 | 108 | 109 | def gaussianblur(tensor, sigma): 110 | if np.isscalar(sigma): 111 | sigma = (sigma, sigma) 112 | kernel = [ceil(sigma[0] * 6), ceil(sigma[1] * 6)] 113 | kernel = [kernel[0] - kernel[0] % 2 + 1, kernel[1] - kernel[1] % 2 + 1] 114 | return torchvision.transforms.functional.gaussian_blur(tensor, kernel, sigma) 115 | 116 | 117 | def crop(tensor, top, left, height, width): 118 | return tensor[:, :, top : top + height, left : left + width] 119 | -------------------------------------------------------------------------------- /sdgrpcserver/k_diffusion.py: -------------------------------------------------------------------------------- 1 | import os, sys, types 2 | 3 | module_path = os.path.join(os.path.dirname(__file__), "src/k-diffusion/k_diffusion") 4 | 5 | import importlib.util 6 | 7 | # We load the k_diffusion files directly rather than relying on Python modules 8 | # This allows us to only install the dependancies of the parts we use 9 | 10 | for name in ['utils', 'sampling', 'external']: 11 | module_name = f"{__name__}.{name}" 12 | file_path = os.path.join(module_path, f"{name}.py") 13 | 14 | # From https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly 15 | spec = importlib.util.spec_from_file_location(module_name, file_path) 16 | module = importlib.util.module_from_spec(spec) 17 | sys.modules[module_name] = module 18 | spec.loader.exec_module(module) 19 | -------------------------------------------------------------------------------- /sdgrpcserver/patching.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import inspect 3 | 4 | 5 | def patch_module_references(item, **patch): 6 | container_module = inspect.getmodule(item) 7 | 8 | # Handle the case of partial or other wrapped callables 9 | # (only for functools - other wrapper will break this function) 10 | if container_module is functools: 11 | container_module = inspect.getmodule(item.func) 12 | 13 | for k, v in patch.items(): 14 | setattr(container_module, k, v) 15 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/sdgrpcserver/pipeline/__init__.py -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/attention_replacer.py: -------------------------------------------------------------------------------- 1 | # Originally from https://github.com/shunk031/training-free-structured-diffusion-guidance/blob/main/tfsdg/utils/replace_layer.py 2 | 3 | import inspect 4 | from typing import Type 5 | 6 | import torch.nn as nn 7 | from diffusers.models.attention import CrossAttention 8 | 9 | 10 | def replace_cross_attention( 11 | target: nn.Module, crossattention: Type[nn.Module], name: str 12 | ) -> None: 13 | for attr_str in dir(target): 14 | target_attr = getattr(target, attr_str) 15 | 16 | if isinstance(target_attr, CrossAttention): 17 | query_dim = target_attr.to_q.in_features 18 | assert target_attr.to_k.in_features == target_attr.to_v.in_features 19 | context_dim = target_attr.to_k.in_features 20 | heads = target_attr.heads 21 | dim_head = int(target_attr.scale**-2) 22 | dropout = target_attr.to_out[-1].p 23 | 24 | ca_kwargs = { 25 | "query_dim": query_dim, 26 | "context_dim": context_dim, 27 | "heads": heads, 28 | "dim_head": dim_head, 29 | "dropout": dropout, 30 | } 31 | 32 | accepts_struct_attention = "struct_attention" in set( 33 | inspect.signature(crossattention.__init__).parameters.keys() 34 | ) 35 | 36 | if accepts_struct_attention: 37 | ca_kwargs["struct_attention"] = attr_str == "attn2" 38 | 39 | ca = crossattention(**ca_kwargs) 40 | ca.to( 41 | device=target_attr.to_q.weight.device, 42 | dtype=target_attr.to_q.weight.dtype, 43 | ) 44 | 45 | original_params = list(target_attr.parameters()) 46 | proposed_params = list(ca.parameters()) 47 | assert len(original_params) == len(proposed_params) 48 | 49 | for p1, p2 in zip(original_params, proposed_params): 50 | p2.data.copy_(p1.data) 51 | 52 | setattr(target, attr_str, ca) 53 | 54 | for name, immediate_child_module in target.named_children(): 55 | replace_cross_attention( 56 | target=immediate_child_module, crossattention=crossattention, name=name 57 | ) 58 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/diffusers_types.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass 5 | class VaeConfig: 6 | block_out_channels: list[int] 7 | 8 | 9 | @dataclass 10 | class UnetConfig: 11 | sample_size: int | None 12 | attention_head_dim: int | list[int] 13 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/easing.py: -------------------------------------------------------------------------------- 1 | from typing import Literal, Type 2 | 3 | from easing_functions import easing 4 | 5 | EASING_TYPE = Literal[ 6 | "linear", "quad", "cubic", "quartic", "quintic", "sine", "circular", "expo" 7 | ] 8 | 9 | EASINGS: dict[EASING_TYPE, Type[easing.EasingBase]] = { 10 | "linear": easing.LinearInOut, 11 | "quad": easing.QuadEaseInOut, 12 | "cubic": easing.CubicEaseInOut, 13 | "quartic": easing.QuarticEaseInOut, 14 | "quintic": easing.QuinticEaseInOut, 15 | "sine": easing.SineEaseInOut, 16 | "circular": easing.CircularEaseInOut, 17 | "expo": easing.ExponentialEaseInOut, 18 | } 19 | 20 | 21 | class Easing: 22 | def __init__( 23 | self, 24 | floor: float, 25 | start: float, 26 | end: float, 27 | easing: EASING_TYPE | Type[easing.EasingBase], 28 | ): 29 | self.floor = 0 30 | self.start = 0.1 31 | self.end = 0.3 32 | 33 | if isinstance(easing, str): 34 | easing = EASINGS[easing] 35 | 36 | self.easing = easing( 37 | end=1 - floor, duration=1 - (start + end) # type: ignore - easing_functions takes floats just fine 38 | ) 39 | 40 | def interp(self, u: float): 41 | if u < self.start: 42 | return self.floor 43 | if u > 1 - self.end: 44 | return 1 45 | 46 | return self.floor + self.easing(u - self.start) 47 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/kschedulers/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .scheduling_utils import KSchedulerMixin 3 | from .scheduling_dpm2_ancestral_discrete import DPM2AncestralDiscreteScheduler 4 | from .scheduling_dpm2_discrete import DPM2DiscreteScheduler 5 | from .scheduling_euler_ancestral_discrete import EulerAncestralDiscreteScheduler 6 | from .scheduling_euler_discrete import EulerDiscreteScheduler 7 | from .scheduling_heun_discrete import HeunDiscreteScheduler 8 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/kschedulers/scheduling_dpm2_discrete.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Katherine Crowson, The HuggingFace Team and hlky. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Optional, Tuple, Union 16 | 17 | import numpy as np 18 | import torch 19 | 20 | from scipy import integrate 21 | 22 | from diffusers.configuration_utils import ConfigMixin, register_to_config 23 | from diffusers.schedulers.scheduling_utils import SchedulerOutput 24 | from .scheduling_utils import KSchedulerMixin 25 | 26 | 27 | class DPM2DiscreteScheduler(KSchedulerMixin, ConfigMixin): 28 | """ 29 | A sampler inspired by DPM-Solver-2 and Algorithm 2 from Karras et al. (2022). 30 | for discrete beta schedules. Based on the original k-diffusion implementation by 31 | Katherine Crowson: 32 | https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L119 33 | 34 | [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` 35 | function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. 36 | [`~ConfigMixin`] also provides general loading and saving functionality via the [`~ConfigMixin.save_config`] and 37 | [`~ConfigMixin.from_config`] functions. 38 | 39 | Args: 40 | num_train_timesteps (`int`): number of diffusion steps used to train the model. 41 | beta_start (`float`): the starting `beta` value of inference. 42 | beta_end (`float`): the final `beta` value. 43 | beta_schedule (`str`): 44 | the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from 45 | `linear` or `scaled_linear`. 46 | trained_betas (`np.ndarray`, optional): 47 | option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc. 48 | options to clip the variance used when adding noise to the denoised sample. Choose from `fixed_small`, 49 | `fixed_small_log`, `fixed_large`, `fixed_large_log`, `learned` or `learned_range`. 50 | 51 | """ 52 | 53 | @register_to_config 54 | def __init__( 55 | self, 56 | num_train_timesteps: int = 1000, 57 | beta_start: float = 0.00085, #sensible defaults 58 | beta_end: float = 0.012, 59 | beta_schedule: str = "linear", 60 | trained_betas: Optional[np.ndarray] = None, 61 | ): 62 | if trained_betas is not None: 63 | self.betas = torch.from_numpy(trained_betas) 64 | elif beta_schedule == "linear": 65 | self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) 66 | elif beta_schedule == "scaled_linear": 67 | # this schedule is very specific to the latent diffusion model. 68 | self.betas = ( 69 | torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 70 | ) 71 | else: 72 | raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") 73 | 74 | self.alphas = 1.0 - self.betas 75 | self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) 76 | 77 | self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5 78 | self.log_sigmas = self.sigmas.log() 79 | 80 | # setable values 81 | self.num_inference_steps = None 82 | self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64)) 83 | self.derivatives = [] 84 | 85 | def set_timesteps(self, num_inference_steps: int): 86 | """ 87 | Sets the timesteps used for the diffusion chain. Supporting function to be run before inference. 88 | 89 | Args: 90 | num_inference_steps (`int`): 91 | the number of diffusion steps used when generating samples with a pre-trained model. 92 | """ 93 | self.num_inference_steps = num_inference_steps 94 | timesteps = np.linspace(self.config.num_train_timesteps - 1, 0, num_inference_steps, dtype=float) 95 | self.timesteps = torch.from_numpy(timesteps) 96 | 97 | low_idx = np.floor(timesteps).astype(int) 98 | high_idx = np.ceil(timesteps).astype(int) 99 | frac = np.mod(timesteps, 1.0) 100 | sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) 101 | sigmas = (1 - frac) * sigmas[low_idx] + frac * sigmas[high_idx] 102 | sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32) 103 | self.sigmas = torch.from_numpy(sigmas) 104 | 105 | self.init_noise_sigma = self.sigmas[0] 106 | self.derivatives = [] 107 | 108 | def step( 109 | self, 110 | model_output: Union[torch.FloatTensor, np.ndarray], 111 | timestep: float, 112 | sample: Union[torch.FloatTensor, np.ndarray], 113 | s_churn: float = 0., 114 | s_tmin: float = 0., 115 | s_tmax: float = float('inf'), 116 | s_noise: float = 1., 117 | generator = None, 118 | noise_predictor = None, 119 | return_dict: bool = True, 120 | ) -> Union[SchedulerOutput, Tuple]: 121 | """ 122 | Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion 123 | process from the learned model outputs (most often the predicted noise). 124 | 125 | Args: 126 | model_output (`torch.FloatTensor` or `np.ndarray`): direct output from learned diffusion model. 127 | timestep (`int`): current discrete timestep in the diffusion chain. 128 | sample (`torch.FloatTensor` or `np.ndarray`): 129 | current instance of sample being created by diffusion process. 130 | s_churn (`float`) 131 | s_tmin (`float`) 132 | s_tmax (`float`) 133 | s_noise (`float`) 134 | return_dict (`bool`): option for returning tuple rather than SchedulerOutput class 135 | 136 | Returns: 137 | [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`: 138 | [`~schedulers.scheduling_utils.SchedulerOutput`] if `return_dict` is True, otherwise a `tuple`. When 139 | returning a tuple, the first element is the sample tensor. 140 | 141 | """ 142 | if not noise_predictor: print("Noise predictor not provided, result will not be correct.") 143 | 144 | index = self.t_to_index(timestep) 145 | 146 | sigma = self.sigmas[index] 147 | gamma = min(s_churn / (len(self.sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigma <= s_tmax else 0. 148 | eps = torch.randn(sample.size(), dtype=sample.dtype, layout=sample.layout, device=generator.device, generator=generator).to(sample.device) * s_noise 149 | sigma_hat = sigma * (gamma + 1) 150 | if gamma > 0: 151 | sample = sample + eps * (sigma_hat ** 2 - sigma ** 2) ** 0.5 152 | # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise 153 | pred_original_sample = sample - sigma_hat * model_output 154 | 155 | # 2. Convert to an ODE derivative 156 | derivative = (sample - pred_original_sample) / sigma_hat 157 | self.derivatives.append(derivative) 158 | 159 | if self.sigmas[index + 1] == 0: 160 | dt = self.sigmas[index + 1] - sigma_hat 161 | sample = sample + derivative * dt 162 | else: 163 | sigma_mid = sigma_hat.log().lerp(self.sigmas[index + 1].log(), 0.5).exp() 164 | dt_1 = sigma_mid - sigma_hat 165 | dt_2 = self.sigmas[index + 1] - sigma_hat 166 | sample_2 = sample + derivative * dt_1 167 | 168 | if noise_predictor: 169 | model_output_2 = noise_predictor(sample_2, self.sigma_to_t(sigma_mid)) 170 | pred_original_sample_2 = sample_2 - sigma_mid * model_output_2 171 | else: 172 | pred_original_sample_2 = sample_2 - sigma_mid * model_output 173 | 174 | derivative_2 = (sample_2 - pred_original_sample_2) / sigma_mid 175 | sample = sample + derivative_2 * dt_2 176 | 177 | prev_sample = sample 178 | 179 | if not return_dict: 180 | return (prev_sample,) 181 | 182 | return SchedulerOutput(prev_sample=prev_sample) 183 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/kschedulers/scheduling_euler_ancestral_discrete.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Katherine Crowson, The HuggingFace Team and hlky. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Optional, Tuple, Union 16 | 17 | import numpy as np 18 | import torch 19 | 20 | from scipy import integrate 21 | 22 | from diffusers.configuration_utils import ConfigMixin, register_to_config 23 | from diffusers.schedulers.scheduling_utils import SchedulerOutput 24 | from .scheduling_utils import KSchedulerMixin 25 | 26 | 27 | class EulerAncestralDiscreteScheduler(KSchedulerMixin, ConfigMixin): 28 | """ 29 | Ancestral sampling with Euler method steps. 30 | for discrete beta schedules. Based on the original k-diffusion implementation by 31 | Katherine Crowson: 32 | https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L72 33 | 34 | [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` 35 | function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. 36 | [`~ConfigMixin`] also provides general loading and saving functionality via the [`~ConfigMixin.save_config`] and 37 | [`~ConfigMixin.from_config`] functions. 38 | 39 | Args: 40 | num_train_timesteps (`int`): number of diffusion steps used to train the model. 41 | beta_start (`float`): the starting `beta` value of inference. 42 | beta_end (`float`): the final `beta` value. 43 | beta_schedule (`str`): 44 | the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from 45 | `linear` or `scaled_linear`. 46 | trained_betas (`np.ndarray`, optional): 47 | option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc. 48 | options to clip the variance used when adding noise to the denoised sample. Choose from `fixed_small`, 49 | `fixed_small_log`, `fixed_large`, `fixed_large_log`, `learned` or `learned_range`. 50 | 51 | """ 52 | 53 | @register_to_config 54 | def __init__( 55 | self, 56 | num_train_timesteps: int = 1000, 57 | beta_start: float = 0.00085, #sensible defaults 58 | beta_end: float = 0.012, 59 | beta_schedule: str = "linear", 60 | trained_betas: Optional[np.ndarray] = None, 61 | ): 62 | if trained_betas is not None: 63 | self.betas = torch.from_numpy(trained_betas) 64 | elif beta_schedule == "linear": 65 | self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) 66 | elif beta_schedule == "scaled_linear": 67 | # this schedule is very specific to the latent diffusion model. 68 | self.betas = ( 69 | torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 70 | ) 71 | else: 72 | raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") 73 | 74 | self.alphas = 1.0 - self.betas 75 | self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) 76 | 77 | self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5 78 | self.log_sigmas = self.sigmas.log() 79 | 80 | # setable values 81 | self.num_inference_steps = None 82 | self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64)) 83 | self.derivatives = [] 84 | 85 | def set_timesteps(self, num_inference_steps: int): 86 | """ 87 | Sets the timesteps used for the diffusion chain. Supporting function to be run before inference. 88 | 89 | Args: 90 | num_inference_steps (`int`): 91 | the number of diffusion steps used when generating samples with a pre-trained model. 92 | """ 93 | self.num_inference_steps = num_inference_steps 94 | timesteps = np.linspace(self.config.num_train_timesteps - 1, 0, num_inference_steps, dtype=float) 95 | self.timesteps = torch.from_numpy(timesteps) 96 | 97 | low_idx = np.floor(timesteps).astype(int) 98 | high_idx = np.ceil(timesteps).astype(int) 99 | frac = np.mod(timesteps, 1.0) 100 | sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) 101 | sigmas = (1 - frac) * sigmas[low_idx] + frac * sigmas[high_idx] 102 | sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32) 103 | self.sigmas = torch.from_numpy(sigmas) 104 | 105 | self.init_noise_sigma = self.sigmas[0] 106 | self.derivatives = [] 107 | 108 | def step( 109 | self, 110 | model_output: Union[torch.FloatTensor, np.ndarray], 111 | timestep: float, 112 | sample: Union[torch.FloatTensor, np.ndarray], 113 | generator = None, 114 | return_dict: bool = True, 115 | ) -> Union[SchedulerOutput, Tuple]: 116 | """ 117 | Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion 118 | process from the learned model outputs (most often the predicted noise). 119 | 120 | Args: 121 | model_output (`torch.FloatTensor` or `np.ndarray`): direct output from learned diffusion model. 122 | timestep (`int`): current discrete timestep in the diffusion chain. 123 | sample (`torch.FloatTensor` or `np.ndarray`): 124 | current instance of sample being created by diffusion process. 125 | return_dict (`bool`): option for returning tuple rather than SchedulerOutput class 126 | 127 | Returns: 128 | [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`: 129 | [`~schedulers.scheduling_utils.SchedulerOutput`] if `return_dict` is True, otherwise a `tuple`. When 130 | returning a tuple, the first element is the sample tensor. 131 | 132 | """ 133 | index = self.t_to_index(timestep) 134 | 135 | sigma = self.sigmas[index] 136 | 137 | # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise 138 | pred_original_sample = sample - sigma * model_output 139 | sigma_from = self.sigmas[index] 140 | sigma_to = self.sigmas[index + 1] 141 | sigma_up = (sigma_to ** 2 * (sigma_from ** 2 - sigma_to ** 2) / sigma_from ** 2) ** 0.5 142 | sigma_down = (sigma_to ** 2 - sigma_up ** 2) ** 0.5 143 | # 2. Convert to an ODE derivative 144 | derivative = (sample - pred_original_sample) / sigma 145 | self.derivatives.append(derivative) 146 | 147 | dt = sigma_down - sigma 148 | 149 | prev_sample = sample + derivative * dt 150 | 151 | noise = torch.randn(prev_sample.size(), dtype=prev_sample.dtype, layout=prev_sample.layout, device=generator.device, generator=generator).to(prev_sample.device) 152 | prev_sample = prev_sample + noise * sigma_up 153 | 154 | if not return_dict: 155 | return (prev_sample,) 156 | 157 | return SchedulerOutput(prev_sample=prev_sample) 158 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/kschedulers/scheduling_euler_discrete.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Katherine Crowson, The HuggingFace Team and hlky. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Optional, Tuple, Union 16 | 17 | import numpy as np 18 | import torch 19 | 20 | from scipy import integrate 21 | 22 | from diffusers.configuration_utils import ConfigMixin, register_to_config 23 | from diffusers.schedulers.scheduling_utils import SchedulerOutput 24 | from .scheduling_utils import KSchedulerMixin 25 | 26 | 27 | class EulerDiscreteScheduler(KSchedulerMixin, ConfigMixin): 28 | """ 29 | Implements Algorithm 2 (Euler steps) from Karras et al. (2022). 30 | for discrete beta schedules. Based on the original k-diffusion implementation by 31 | Katherine Crowson: 32 | https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L51 33 | 34 | [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` 35 | function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. 36 | [`~ConfigMixin`] also provides general loading and saving functionality via the [`~ConfigMixin.save_config`] and 37 | [`~ConfigMixin.from_config`] functions. 38 | 39 | Args: 40 | num_train_timesteps (`int`): number of diffusion steps used to train the model. 41 | beta_start (`float`): the starting `beta` value of inference. 42 | beta_end (`float`): the final `beta` value. 43 | beta_schedule (`str`): 44 | the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from 45 | `linear` or `scaled_linear`. 46 | trained_betas (`np.ndarray`, optional): 47 | option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc. 48 | options to clip the variance used when adding noise to the denoised sample. Choose from `fixed_small`, 49 | `fixed_small_log`, `fixed_large`, `fixed_large_log`, `learned` or `learned_range`. 50 | 51 | """ 52 | 53 | @register_to_config 54 | def __init__( 55 | self, 56 | num_train_timesteps: int = 1000, 57 | beta_start: float = 0.00085, #sensible defaults 58 | beta_end: float = 0.012, 59 | beta_schedule: str = "linear", 60 | trained_betas: Optional[np.ndarray] = None, 61 | ): 62 | if trained_betas is not None: 63 | self.betas = torch.from_numpy(trained_betas) 64 | elif beta_schedule == "linear": 65 | self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) 66 | elif beta_schedule == "scaled_linear": 67 | # this schedule is very specific to the latent diffusion model. 68 | self.betas = ( 69 | torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 70 | ) 71 | else: 72 | raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") 73 | 74 | self.alphas = 1.0 - self.betas 75 | self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) 76 | 77 | self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5 78 | self.log_sigmas = self.sigmas.log() 79 | 80 | # setable values 81 | self.num_inference_steps = None 82 | self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64)) 83 | self.derivatives = [] 84 | 85 | def set_timesteps(self, num_inference_steps: int): 86 | """ 87 | Sets the timesteps used for the diffusion chain. Supporting function to be run before inference. 88 | 89 | Args: 90 | num_inference_steps (`int`): 91 | the number of diffusion steps used when generating samples with a pre-trained model. 92 | """ 93 | self.num_inference_steps = num_inference_steps 94 | timesteps = np.linspace(self.config.num_train_timesteps - 1, 0, num_inference_steps, dtype=float) 95 | self.timesteps = torch.from_numpy(timesteps) 96 | 97 | low_idx = np.floor(timesteps).astype(int) 98 | high_idx = np.ceil(timesteps).astype(int) 99 | frac = np.mod(timesteps, 1.0) 100 | sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) 101 | sigmas = (1 - frac) * sigmas[low_idx] + frac * sigmas[high_idx] 102 | sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32) 103 | self.sigmas = torch.from_numpy(sigmas) 104 | 105 | self.init_noise_sigma = self.sigmas[0] 106 | self.derivatives = [] 107 | 108 | def step( 109 | self, 110 | model_output: Union[torch.FloatTensor, np.ndarray], 111 | timestep: int, 112 | sample: Union[torch.FloatTensor, np.ndarray], 113 | s_churn: float = 0., 114 | s_tmin: float = 0., 115 | s_tmax: float = float('inf'), 116 | s_noise: float = 1., 117 | generator = None, 118 | return_dict: bool = True, 119 | ) -> Union[SchedulerOutput, Tuple]: 120 | """ 121 | Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion 122 | process from the learned model outputs (most often the predicted noise). 123 | 124 | Args: 125 | model_output (`torch.FloatTensor` or `np.ndarray`): direct output from learned diffusion model. 126 | timestep (`int`): current discrete timestep in the diffusion chain. 127 | sample (`torch.FloatTensor` or `np.ndarray`): 128 | current instance of sample being created by diffusion process. 129 | s_churn (`float`) 130 | s_tmin (`float`) 131 | s_tmax (`float`) 132 | s_noise (`float`) 133 | return_dict (`bool`): option for returning tuple rather than SchedulerOutput class 134 | 135 | Returns: 136 | [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`: 137 | [`~schedulers.scheduling_utils.SchedulerOutput`] if `return_dict` is True, otherwise a `tuple`. When 138 | returning a tuple, the first element is the sample tensor. 139 | 140 | """ 141 | index = self.t_to_index(timestep) 142 | 143 | sigma = self.sigmas[index] 144 | gamma = min(s_churn / (len(self.sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigma <= s_tmax else 0. 145 | eps = torch.randn(sample.size(), dtype=sample.dtype, layout=sample.layout, device=generator.device, generator=generator).to(sample.device) * s_noise 146 | sigma_hat = sigma * (gamma + 1) 147 | if gamma > 0: 148 | sample = sample + eps * (sigma_hat ** 2 - sigma ** 2) ** 0.5 149 | # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise 150 | pred_original_sample = sample - sigma_hat * model_output 151 | 152 | # 2. Convert to an ODE derivative 153 | derivative = (sample - pred_original_sample) / sigma_hat 154 | self.derivatives.append(derivative) 155 | 156 | dt = self.sigmas[index + 1] - sigma_hat 157 | 158 | prev_sample = sample + derivative * dt 159 | 160 | if not return_dict: 161 | return (prev_sample,) 162 | 163 | return SchedulerOutput(prev_sample=prev_sample) 164 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/kschedulers/scheduling_heun_discrete.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Katherine Crowson, The HuggingFace Team and hlky. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Optional, Tuple, Union 16 | 17 | import numpy as np 18 | import torch 19 | 20 | from scipy import integrate 21 | 22 | from diffusers.configuration_utils import ConfigMixin, register_to_config 23 | from diffusers.schedulers.scheduling_utils import SchedulerOutput 24 | from .scheduling_utils import KSchedulerMixin 25 | 26 | 27 | class HeunDiscreteScheduler(KSchedulerMixin, ConfigMixin): 28 | """ 29 | Implements Algorithm 2 (Heun steps) from Karras et al. (2022). 30 | for discrete beta schedules. Based on the original k-diffusion implementation by 31 | Katherine Crowson: 32 | https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L90 33 | 34 | [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` 35 | function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. 36 | [`~ConfigMixin`] also provides general loading and saving functionality via the [`~ConfigMixin.save_config`] and 37 | [`~ConfigMixin.from_config`] functions. 38 | 39 | Args: 40 | num_train_timesteps (`int`): number of diffusion steps used to train the model. 41 | beta_start (`float`): the starting `beta` value of inference. 42 | beta_end (`float`): the final `beta` value. 43 | beta_schedule (`str`): 44 | the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from 45 | `linear` or `scaled_linear`. 46 | trained_betas (`np.ndarray`, optional): 47 | option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc. 48 | options to clip the variance used when adding noise to the denoised sample. Choose from `fixed_small`, 49 | `fixed_small_log`, `fixed_large`, `fixed_large_log`, `learned` or `learned_range`. 50 | 51 | """ 52 | 53 | @register_to_config 54 | def __init__( 55 | self, 56 | num_train_timesteps: int = 1000, 57 | beta_start: float = 0.00085, #sensible defaults 58 | beta_end: float = 0.012, 59 | beta_schedule: str = "linear", 60 | trained_betas: Optional[np.ndarray] = None, 61 | ): 62 | if trained_betas is not None: 63 | self.betas = torch.from_numpy(trained_betas) 64 | elif beta_schedule == "linear": 65 | self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) 66 | elif beta_schedule == "scaled_linear": 67 | # this schedule is very specific to the latent diffusion model. 68 | self.betas = ( 69 | torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 70 | ) 71 | else: 72 | raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") 73 | 74 | self.alphas = 1.0 - self.betas 75 | self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) 76 | 77 | self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5 78 | self.log_sigmas = self.sigmas.log() 79 | 80 | # setable values 81 | self.num_inference_steps = None 82 | self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64)) 83 | self.derivatives = [] 84 | 85 | def set_timesteps(self, num_inference_steps: int): 86 | """ 87 | Sets the timesteps used for the diffusion chain. Supporting function to be run before inference. 88 | 89 | Args: 90 | num_inference_steps (`int`): 91 | the number of diffusion steps used when generating samples with a pre-trained model. 92 | """ 93 | self.num_inference_steps = num_inference_steps 94 | timesteps = np.linspace(self.config.num_train_timesteps - 1, 0, num_inference_steps, dtype=float) 95 | self.timesteps = torch.from_numpy(timesteps) 96 | 97 | low_idx = np.floor(timesteps).astype(int) 98 | high_idx = np.ceil(timesteps).astype(int) 99 | frac = np.mod(timesteps, 1.0) 100 | sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) 101 | sigmas = (1 - frac) * sigmas[low_idx] + frac * sigmas[high_idx] 102 | sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32) 103 | self.sigmas = torch.from_numpy(sigmas) 104 | 105 | self.init_noise_sigma = self.sigmas[0] 106 | self.derivatives = [] 107 | 108 | def step( 109 | self, 110 | model_output: Union[torch.FloatTensor, np.ndarray], 111 | timestep: int, 112 | sample: Union[torch.FloatTensor, np.ndarray], 113 | s_churn: float = 0., 114 | s_tmin: float = 0., 115 | s_tmax: float = float('inf'), 116 | s_noise: float = 1., 117 | generator = None, 118 | noise_predictor = None, 119 | return_dict: bool = True, 120 | ) -> Union[SchedulerOutput, Tuple]: 121 | """ 122 | Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion 123 | process from the learned model outputs (most often the predicted noise). 124 | 125 | Args: 126 | model_output (`torch.FloatTensor` or `np.ndarray`): direct output from learned diffusion model. 127 | timestep (`int`): current discrete timestep in the diffusion chain. 128 | sample (`torch.FloatTensor` or `np.ndarray`): 129 | current instance of sample being created by diffusion process. 130 | s_churn (`float`) 131 | s_tmin (`float`) 132 | s_tmax (`float`) 133 | s_noise (`float`) 134 | return_dict (`bool`): option for returning tuple rather than SchedulerOutput class 135 | 136 | Returns: 137 | [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`: 138 | [`~schedulers.scheduling_utils.SchedulerOutput`] if `return_dict` is True, otherwise a `tuple`. When 139 | returning a tuple, the first element is the sample tensor. 140 | 141 | """ 142 | if not noise_predictor: print("Noise predictor not provided, result will not be correct.") 143 | 144 | index = self.t_to_index(timestep) 145 | 146 | sigma = self.sigmas[index] 147 | gamma = min(s_churn / (len(self.sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigma <= s_tmax else 0. 148 | eps = torch.randn(sample.size(), dtype=sample.dtype, layout=sample.layout, device=generator.device, generator=generator).to(sample.device) * s_noise 149 | sigma_hat = sigma * (gamma + 1) 150 | if gamma > 0: 151 | sample = sample + eps * (sigma_hat ** 2 - sigma ** 2) ** 0.5 152 | # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise 153 | pred_original_sample = sample - sigma_hat * model_output 154 | 155 | # 2. Convert to an ODE derivative 156 | derivative = (sample - pred_original_sample) / sigma_hat 157 | self.derivatives.append(derivative) 158 | 159 | dt = self.sigmas[index + 1] - sigma_hat 160 | if self.sigmas[index + 1] == 0: 161 | # Euler method 162 | sample = sample + derivative * dt 163 | else: 164 | # Heun's method 165 | sample_2 = sample + derivative * dt 166 | 167 | if noise_predictor: 168 | model_output_2 = noise_predictor(sample_2, self.timesteps[index + 1]) 169 | pred_original_sample_2 = sample_2 - self.sigmas[index + 1] * model_output_2 170 | else: 171 | pred_original_sample_2 = sample_2 - self.sigmas[index + 1] * model_output 172 | 173 | derivative_2 = (sample_2 - pred_original_sample_2) / self.sigmas[index + 1] 174 | d_prime = (derivative + derivative_2) / 2 175 | sample = sample + d_prime * dt 176 | 177 | prev_sample = sample 178 | 179 | if not return_dict: 180 | return (prev_sample,) 181 | 182 | return SchedulerOutput(prev_sample=prev_sample) 183 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/kschedulers/scheduling_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from dataclasses import dataclass 15 | from typing import Union 16 | 17 | import numpy as np 18 | import torch 19 | 20 | SCHEDULER_CONFIG_NAME = "scheduler_config.json" 21 | 22 | class KSchedulerMixin: 23 | """ 24 | Mixin containing common functions for the schedulers. 25 | """ 26 | 27 | config_name = SCHEDULER_CONFIG_NAME 28 | ignore_for_config = ["tensor_format"] 29 | 30 | def match_shape(self, values: Union[np.ndarray, torch.Tensor], broadcast_array: Union[np.ndarray, torch.Tensor]): 31 | """ 32 | Turns a 1-D array into an array or tensor with len(broadcast_array.shape) dims. 33 | 34 | Args: 35 | values: an array or tensor of values to extract. 36 | broadcast_array: an array with a larger shape of K dimensions with the batch 37 | dimension equal to the length of timesteps. 38 | Returns: 39 | a tensor of shape [batch_size, 1, ...] where the shape has K dims. 40 | """ 41 | 42 | values = values.flatten() 43 | 44 | while len(values.shape) < len(broadcast_array.shape): 45 | values = values[..., None] 46 | 47 | values = values.to(broadcast_array.device) 48 | 49 | return values 50 | 51 | """ 52 | All the K-Schedulers handle these methods in the same way 53 | """ 54 | 55 | def scale_model_input( 56 | self, sample: torch.FloatTensor, timestep: Union[float, torch.FloatTensor] 57 | ) -> torch.FloatTensor: 58 | """ 59 | Scales the denoising model input by `(sigma**2 + 1) ** 0.5` to match the K-LMS algorithm. 60 | 61 | Args: 62 | sample (`torch.FloatTensor`): input sample 63 | timestep (`float` or `torch.FloatTensor`): the current timestep in the diffusion chain 64 | 65 | Returns: 66 | `torch.FloatTensor`: scaled input sample 67 | """ 68 | sigma = self.t_to_sigma(timestep) 69 | sample = sample / ((sigma**2 + 1) ** 0.5) 70 | return sample 71 | 72 | def add_noise( 73 | self, 74 | original_samples: Union[torch.FloatTensor, np.ndarray], 75 | noise: Union[torch.FloatTensor, np.ndarray], 76 | timesteps: Union[float, torch.FloatTensor], 77 | ) -> Union[torch.FloatTensor, np.ndarray]: 78 | index = self.t_to_index(timesteps) 79 | sigmas = self.match_shape(self.sigmas[index], noise) 80 | noisy_samples = original_samples + noise * sigmas 81 | 82 | return noisy_samples 83 | 84 | def __len__(self): 85 | return self.config.num_train_timesteps 86 | 87 | """ 88 | Taken from https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/external.py 89 | 90 | These assume that: 91 | len(self.timesteps) is num_inference_steps (not num_train_timesteps) 92 | len(self.sigmas) is num_inference_steps (not num_train_timesteps) 93 | 94 | BUT 95 | 96 | len(self.log_sigmas) is num_train_timesteps (not num_inference_steps) 97 | """ 98 | 99 | def t_to_index(self, timestep): 100 | self.timesteps = self.timesteps.to(timestep.device) 101 | 102 | dists = timestep - self.timesteps 103 | return dists.abs().argmin().item() 104 | 105 | def sigma_to_t(self, sigma, quantize=True): 106 | self.log_sigmas = self.log_sigmas.to(sigma.device) 107 | 108 | log_sigma = sigma.log() 109 | dists = log_sigma - self.log_sigmas[:, None] 110 | # Stable Diffusion should be quantized 111 | if quantize: 112 | return dists.abs().argmin(dim=0).view(sigma.shape) 113 | # For continuous distributions 114 | low_idx = dists.ge(0).cumsum(dim=0).argmax(dim=0).clamp(max=self.log_sigmas.shape[0] - 2) 115 | high_idx = low_idx + 1 116 | low, high = self.log_sigmas[low_idx], self.log_sigmas[high_idx] 117 | w = (low - log_sigma) / (low - high) 118 | w = w.clamp(0, 1) 119 | t = (1 - w) * low_idx + w * high_idx 120 | return t.view(sigma.shape) 121 | 122 | def t_to_sigma(self, t): 123 | t = t.float() 124 | low_idx, high_idx, w = t.floor().long(), t.ceil().long(), t.frac() 125 | log_sigma = (1 - w) * self.log_sigmas[low_idx] + w * self.log_sigmas[high_idx] 126 | return log_sigma.exp() 127 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/latent_debugger.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from sdgrpcserver import images 4 | 5 | DEFAULT_ENABLED = set( 6 | [ 7 | # "initial", 8 | # "step", 9 | # "mask", 10 | # "shapednoise", 11 | # "initnoise", 12 | # "blendin", 13 | # "blendout", 14 | # "small", 15 | # "hires_lo", 16 | # "hires_hi", 17 | ] 18 | ) 19 | 20 | DEFAULT_OUTPUT_PATH = "/tests/debug-out/" 21 | 22 | 23 | class LatentDebugger: 24 | def __init__(self, vae, output_path=DEFAULT_OUTPUT_PATH, enabled=None, prefix=""): 25 | self.vae = vae 26 | self.output_path = output_path 27 | self.enabled = enabled if enabled is not None else DEFAULT_ENABLED 28 | self.prefix = prefix 29 | 30 | self.counters = {} 31 | 32 | def log(self, label, i, latents): 33 | if label not in self.enabled: 34 | return 35 | 36 | prefix = "debug" if not self.prefix else f"debug-{self.prefix}" 37 | 38 | self.counters[label] = i = self.counters.get(label, 0) + 1 39 | 40 | stage_latents = 1 / 0.18215 * latents 41 | stage_image = self.vae.decode(stage_latents).sample 42 | stage_image = (stage_image / 2 + 0.5).clamp(0, 1).cpu() 43 | 44 | for j, pngBytes in enumerate(images.toPngBytes(stage_image)): 45 | path = os.path.join(self.output_path, f"{prefix}-{label}-{j}-{i}.png") 46 | with open(path, "wb") as f: 47 | f.write(pngBytes) 48 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/model_utils.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from typing import Literal 3 | 4 | import torch 5 | from accelerate.hooks import ModelHook, add_hook_to_module 6 | from accelerate.utils import send_to_device, set_module_tensor_to_device 7 | 8 | 9 | class CloneToGPUHook(ModelHook): 10 | def __init__(self, execution_device, exclusion_set, top, params, buffers): 11 | self.execution_device = execution_device 12 | self.exclusion_set = exclusion_set 13 | self.top = top 14 | self.params = params 15 | self.buffers = buffers 16 | 17 | def pre_forward(self, module, *args, **kwargs): 18 | if self.exclusion_set: 19 | self.exclusion_set.activate(self.top) 20 | 21 | dev = self.execution_device 22 | 23 | for name, param in module.named_parameters(recurse=False): 24 | if param.device == torch.device("meta"): 25 | # explicitly copy, as set_module_tensor_to_device won't create 26 | # a copy if the device is already correct 27 | new_param = self.params[name].to(dev, copy=True) 28 | set_module_tensor_to_device(module, name, dev, new_param) 29 | 30 | for name, buffer in module.named_buffers(recurse=False): 31 | if buffer.device == torch.device("meta"): 32 | new_buffer = self.buffers[name].to(dev, copy=True) 33 | set_module_tensor_to_device(module, name, dev, new_buffer) 34 | 35 | return ( 36 | send_to_device(args, dev), 37 | send_to_device(kwargs, dev), 38 | ) 39 | 40 | def reset(self, model): 41 | for name in self.params.keys(): 42 | set_module_tensor_to_device(model, name, "meta") 43 | for name in self.buffers.keys(): 44 | set_module_tensor_to_device(model, name, "meta") 45 | 46 | 47 | class GPUExclusionSet: 48 | def __init__(self, max_activated=-1): 49 | self.sets = [] 50 | self.activated = [] 51 | self.max_activated = max_activated 52 | 53 | def add(self, top): 54 | models = [ 55 | model 56 | for _, model in top.named_modules() 57 | if hasattr(model, "_hf_hook") and isinstance(model._hf_hook, CloneToGPUHook) 58 | ] 59 | 60 | self.sets.append((top, models)) 61 | 62 | def reset(self, exclude=[]): 63 | exclude = list(exclude) 64 | 65 | for top, models in self.sets: 66 | if top in exclude: 67 | continue 68 | 69 | for model in models: 70 | model._hf_hook.reset(model) 71 | 72 | def activate(self, top): 73 | # No-op if top is already the most recently activated 74 | if self.activated and self.activated[0] is top: 75 | return 76 | 77 | # Update the LRU activated queue 78 | self.activated = [model for model in self.activated if model is not top] 79 | self.activated.insert(0, top) 80 | self.activated = self.activated[: self.max_activated] 81 | 82 | self.reset(exclude=self.activated) 83 | 84 | 85 | def clone_model( 86 | model, 87 | clone_tensors: Literal["share"] | str | torch.device = "share", 88 | exclusion_set=None, 89 | ): 90 | """ 91 | Copies a model so you get a different set of instances, but they share 92 | all their parameters and buffers 93 | """ 94 | 95 | # If this isn't actually a model, just return a deepcopy 96 | if not isinstance(model, torch.nn.Module): 97 | clone = deepcopy(model) 98 | if clone_tensors != "share": 99 | clone = clone.to(clone_tensors) 100 | return clone 101 | 102 | # Start by pulling all the Tensors out of the model, so they're not copied on deepclone 103 | cache = {} 104 | 105 | for (model_name, source) in model.named_modules(): 106 | model_params = {} 107 | model_buffers = {} 108 | 109 | for name, param in source.named_parameters(recurse=False): 110 | model_params[name] = param 111 | source._parameters[name] = None 112 | 113 | for name, buffer in source.named_buffers(recurse=False): 114 | model_buffers[name] = buffer 115 | source._buffers[name] = None 116 | 117 | cache[model_name] = (model_params, model_buffers) 118 | 119 | # Deep clone the model 120 | clone = deepcopy(model) 121 | 122 | # Put the tensors back into the model 123 | for (model_name, dest) in model.named_modules(): 124 | model_params, model_buffers = cache[model_name] 125 | 126 | for name, param in model_params.items(): 127 | dest._parameters[name] = param 128 | for name, buffer in model_buffers.items(): 129 | dest._buffers[name] = buffer 130 | 131 | # And into the clone 132 | # Even if we're not sharing, set it to shared to start with 133 | for (model_name, dest) in clone.named_modules(): 134 | model_params, model_buffers = cache[model_name] 135 | 136 | for name, param in model_params.items(): 137 | dest.register_parameter(name, param) 138 | for name, buffer in model_buffers.items(): 139 | dest.register_buffer(name, buffer) 140 | 141 | if clone_tensors != "share": 142 | if exclusion_set: 143 | exclusion_set.add(clone) 144 | 145 | for (model_name, dest) in clone.named_modules(): 146 | model_params, model_buffers = cache[model_name] 147 | 148 | if exclusion_set: 149 | for name in model_params.keys(): 150 | set_module_tensor_to_device(dest, name, "meta") 151 | for name in model_buffers.keys(): 152 | set_module_tensor_to_device(dest, name, "meta") 153 | 154 | add_hook_to_module( 155 | dest, 156 | CloneToGPUHook( 157 | clone_tensors, exclusion_set, clone, model_params, model_buffers 158 | ), 159 | ) 160 | else: 161 | for name, param in model_params.items(): 162 | new_param = param.to(clone_tensors, copy=True) 163 | set_module_tensor_to_device(dest, name, clone_tensors, new_param) 164 | for name, buffer in model_buffers.items(): 165 | new_buffer = buffer.to(clone_tensors, copy=True) 166 | set_module_tensor_to_device(dest, name, clone_tensors, new_buffer) 167 | 168 | return clone 169 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/sdgrpcserver/pipeline/models/__init__.py -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/models/memory_efficient_cross_attention.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional 2 | 3 | from torch import nn 4 | 5 | try: 6 | import xformers 7 | import xformers.ops 8 | except: 9 | xformers = None 10 | 11 | def has_xformers(): 12 | return xformers is not None 13 | 14 | # From https://github.com/huggingface/diffusers/pull/532 15 | 16 | class MemoryEfficientCrossAttention(nn.Module): 17 | def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0.0): 18 | super().__init__() 19 | inner_dim = dim_head * heads 20 | context_dim = context_dim if context_dim is not None else query_dim 21 | 22 | self.heads = heads 23 | self.dim_head = dim_head 24 | 25 | self.to_q = nn.Linear(query_dim, inner_dim, bias=False) 26 | self.to_k = nn.Linear(context_dim, inner_dim, bias=False) 27 | self.to_v = nn.Linear(context_dim, inner_dim, bias=False) 28 | 29 | self.to_out = nn.Sequential(nn.Linear(inner_dim, query_dim), nn.Dropout(dropout)) 30 | self.attention_op: Optional[Any] = None 31 | 32 | def forward(self, x, context=None, mask=None): 33 | q = self.to_q(x) 34 | context = context if context is not None else x 35 | k = self.to_k(context) 36 | v = self.to_v(context) 37 | 38 | b, _, _ = q.shape 39 | q, k, v = map( 40 | lambda t: t.unsqueeze(3) 41 | .reshape(b, t.shape[1], self.heads, self.dim_head) 42 | .permute(0, 2, 1, 3) 43 | .reshape(b * self.heads, t.shape[1], self.dim_head) 44 | .contiguous(), 45 | (q, k, v), 46 | ) 47 | 48 | # actually compute the attention, what we cannot get enough of 49 | out = xformers.ops.memory_efficient_attention(q, k, v, attn_bias=None, op=self.attention_op) 50 | 51 | # TODO: Use this directly in the attention operation, as a bias 52 | if mask is not None: 53 | raise NotImplementedError 54 | out = ( 55 | out.unsqueeze(0) 56 | .reshape(b, self.heads, out.shape[1], self.dim_head) 57 | .permute(0, 2, 1, 3) 58 | .reshape(b, out.shape[1], self.heads * self.dim_head) 59 | ) 60 | return self.to_out(out) 61 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/models/structured_cross_attention.py: -------------------------------------------------------------------------------- 1 | # Mostly from https://github.com/shunk031/training-free-structured-diffusion-guidance 2 | # 3 | # Changes: 4 | # - _attention changed to _sliced_attention to match Diffusers new(?) argument structure 5 | 6 | 7 | from typing import Optional, Tuple 8 | 9 | import torch as th 10 | from diffusers.models.attention import CrossAttention 11 | 12 | from sdgrpcserver.pipeline.text_embedding.structured_text_embedding import KeyValueTensors 13 | 14 | from einops.layers.torch import Reduce 15 | 16 | class StructuredCrossAttention(CrossAttention): 17 | def __init__( 18 | self, 19 | query_dim: int, 20 | context_dim: Optional[int] = None, 21 | heads: int = 8, 22 | dim_head: int = 64, 23 | dropout: int = 0, 24 | struct_attention: bool = False, 25 | ) -> None: 26 | super().__init__(query_dim, context_dim, heads, dim_head, dropout) 27 | self.struct_attention = struct_attention 28 | 29 | self.max_pooling_layer = Reduce(f"b c h w -> 1 c h w", 'max') 30 | 31 | 32 | def struct_qkv( 33 | self, 34 | q: th.Tensor, 35 | context: Tuple[th.Tensor, KeyValueTensors], 36 | mask: Optional[th.Tensor] = None, 37 | ) -> th.Tensor: 38 | 39 | assert len(context) == 2 and isinstance(context, tuple) 40 | uc_context = context[0] 41 | context_k = context[1].k 42 | context_v = context[1].v 43 | 44 | if isinstance(context_k, list) and isinstance(context_v, list): 45 | return self.multi_qkv( 46 | q=q, 47 | uc_context=uc_context, 48 | context_k=context_k, 49 | context_v=context_v, 50 | mask=mask, 51 | ) 52 | elif isinstance(context_k, th.Tensor) and isinstance(context_v, th.Tensor): 53 | return self.heterogenous_qkv( 54 | q=q, 55 | uc_context=uc_context, 56 | context_k=context_k, 57 | context_v=context_v, 58 | mask=mask, 59 | ) 60 | else: 61 | raise NotImplementedError 62 | 63 | def multi_qkv( 64 | self, 65 | q: th.Tensor, 66 | uc_context: th.Tensor, 67 | context_k: th.Tensor, 68 | context_v: th.Tensor, 69 | mask: Optional[th.Tensor] = None, 70 | ) -> None: 71 | h = self.heads 72 | assert uc_context.size(0) == context_k[0].size(0) == context_v[0].size(0) 73 | true_bs = uc_context.size(0)*h 74 | 75 | k_uc = self.to_k(uc_context) 76 | v_uc = self.to_v(uc_context) 77 | 78 | k_c = [self.to_k(c_k) for c_k in context_k] 79 | v_c = [self.to_v(c_v) for c_v in context_v] 80 | 81 | q = self.reshape_heads_to_batch_dim(q) 82 | k_uc = self.reshape_heads_to_batch_dim(k_uc) 83 | v_uc = self.reshape_heads_to_batch_dim(v_uc) 84 | 85 | k_c = [self.reshape_heads_to_batch_dim(k) for k in k_c] 86 | v_c = [self.reshape_heads_to_batch_dim(v) for v in v_c] 87 | 88 | q_uc = q[:true_bs] 89 | q_c = q[true_bs:] 90 | 91 | sim_uc = th.matmul(q_uc, k_uc.transpose(-1, -2)) * self.scale 92 | sim_c = [th.matmul(q_c, k.transpose(-1, -2)) * self.scale for k in k_c] 93 | 94 | attn_uc = sim_uc.softmax(dim=-1) 95 | attn_c = [sim.softmax(dim=-1) for sim in sim_c] 96 | 97 | out_uc = th.matmul(attn_uc, v_uc) 98 | out_c = [th.matmul(attn, v) for attn, v in zip(attn_c, v_c)] 99 | 100 | out_c = sum(out_c) / len(v_c) 101 | 102 | out = th.cat([out_uc, out_c]) 103 | 104 | return self.reshape_batch_dim_to_heads(out) 105 | 106 | def normal_qkv( 107 | self, 108 | q: th.Tensor, 109 | context: th.Tensor, 110 | mask: Optional[th.Tensor] = None, 111 | ) -> th.Tensor: 112 | 113 | batch_size, sequence_length, dim = q.shape 114 | 115 | k = self.to_k(context) 116 | v = self.to_v(context) 117 | 118 | q = self.reshape_heads_to_batch_dim(q) 119 | k = self.reshape_heads_to_batch_dim(k) 120 | v = self.reshape_heads_to_batch_dim(v) 121 | 122 | hidden_states = self._sliced_attention(q, k, v, sequence_length, dim) 123 | 124 | return hidden_states 125 | 126 | def heterogenous_qkv( 127 | self, 128 | q: th.Tensor, 129 | uc_context: th.Tensor, 130 | context_k: th.Tensor, 131 | context_v: th.Tensor, 132 | mask: Optional[th.Tensor] = None, 133 | ) -> th.Tensor: 134 | 135 | batch_size, sequence_length, dim = q.shape 136 | 137 | k = self.to_k(th.cat((uc_context, context_k), dim=0)) 138 | v = self.to_v(th.cat((uc_context, context_v), dim=0)) 139 | 140 | q = self.reshape_heads_to_batch_dim(q) 141 | k = self.reshape_heads_to_batch_dim(k) 142 | v = self.reshape_heads_to_batch_dim(v) 143 | 144 | hidden_states = self._sliced_attention(q, k, v, sequence_length, dim) 145 | 146 | return hidden_states 147 | 148 | def get_kv(self, context: th.Tensor) -> KeyValueTensors: 149 | return KeyValueTensors(k=self.to_k(context), v=self.to_v(context)) 150 | 151 | def forward( 152 | self, 153 | x: th.Tensor, 154 | context: Optional[Tuple[th.Tensor, KeyValueTensors]] = None, 155 | mask: Optional[th.Tensor] = None, 156 | ) -> th.Tensor: 157 | 158 | q = self.to_q(x) 159 | 160 | if isinstance(context, tuple): 161 | assert len(context) == 2 162 | assert isinstance(context[0], th.Tensor) # unconditioned embedding 163 | assert isinstance(context[1], KeyValueTensors) # conditioned embedding 164 | 165 | if self.struct_attention: 166 | out = self.struct_qkv(q=q, context=context, mask=mask) 167 | else: 168 | uc_context = context[0] 169 | c_full_seq = context[1].k[0].unsqueeze(dim=0) 170 | print("n", c_full_seq.shape) 171 | out = self.normal_qkv( 172 | q=q, context=th.cat((uc_context, c_full_seq), dim=0), mask=mask 173 | ) 174 | else: 175 | ctx = context if context is not None else x 176 | out = self.normal_qkv(q=q, context=ctx, mask=mask) 177 | 178 | return self.to_out(out) 179 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/randtools.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional, Sequence 2 | 3 | import torch 4 | 5 | 6 | def batched_rand( 7 | shape: Sequence[int], 8 | generators: List[torch.Generator], 9 | device: torch.device, 10 | dtype: torch.dtype, 11 | ) -> torch.Tensor: 12 | 13 | if shape[0] % len(generators) != 0: 14 | raise ValueError( 15 | f"shape[0] ({shape[0]}) needs to be a multiple of len(generators) ({len(generators)})" 16 | ) 17 | 18 | latents = torch.cat( 19 | [ 20 | torch.rand( 21 | (1, *shape[1:]), 22 | generator=generator, 23 | device=generator.device, 24 | dtype=dtype, 25 | ) 26 | for generator in generators * (shape[0] // len(generators)) 27 | ], 28 | dim=0, 29 | ) 30 | 31 | return latents.to(device) 32 | 33 | 34 | def batched_randn( 35 | shape: Sequence[int], 36 | generators: List[torch.Generator], 37 | device: torch.device, 38 | dtype: torch.dtype, 39 | ) -> torch.Tensor: 40 | 41 | if shape[0] % len(generators) != 0: 42 | raise ValueError( 43 | f"shape[0] ({shape[0]}) needs to be a multiple of len(generators) ({len(generators)})" 44 | ) 45 | 46 | latents = torch.cat( 47 | [ 48 | torch.randn( 49 | (1, *shape[1:]), 50 | generator=generator, 51 | device=generator.device, 52 | dtype=dtype, 53 | ) 54 | for generator in generators * (shape[0] // len(generators)) 55 | ], 56 | dim=0, 57 | ) 58 | 59 | return latents.to(device) 60 | 61 | 62 | class TorchRandOverride: 63 | def __init__(self, generators): 64 | self.generators = generators 65 | 66 | def randn_like( 67 | self, 68 | input: torch.Tensor, 69 | *args, 70 | dtype: Optional[torch.dtype] = None, 71 | device=None, 72 | **kwargs, 73 | ): 74 | if input.shape[0] % len(self.generators) != 0: 75 | if dtype: 76 | kwargs["dtype"] = dtype 77 | if device: 78 | kwargs["device"] = device 79 | return torch.randn_like(input, *args, **kwargs) 80 | 81 | if device is None: 82 | device = input.device 83 | if dtype is None: 84 | dtype = input.dtype 85 | return batched_randn(input.shape, self.generators, device, dtype) 86 | 87 | def randint_like( 88 | self, 89 | input, 90 | *args, 91 | high=None, 92 | low=None, 93 | dtype=None, 94 | layout=torch.strided, 95 | device=None, 96 | **kwargs, 97 | ): 98 | if len(args) == 1: 99 | high = args[0] 100 | elif args: 101 | low = args[0] 102 | high = args[1] 103 | if low is None: 104 | low = 0 105 | 106 | if input.shape[0] % len(self.generators) != 0: 107 | print("Skip") 108 | return torch.randint_like( 109 | input, 110 | low=low, 111 | high=high, 112 | dtype=dtype, 113 | layout=layout, 114 | device=device, 115 | **kwargs, 116 | ) 117 | 118 | latents = torch.cat( 119 | [ 120 | torch.randint( 121 | size=(1, *input.shape[1:]), 122 | low=low, 123 | high=high, 124 | generator=generator, 125 | device=generator.device, 126 | dtype=dtype, 127 | ) 128 | for generator in self.generators 129 | ], 130 | dim=0, 131 | ) 132 | 133 | return latents.to(device) 134 | 135 | def __getattr__(self, item): 136 | return getattr(torch, item) 137 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/safety_checkers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | from transformers import CLIPConfig, CLIPVisionModel, PreTrainedModel 6 | from transformers.feature_extraction_utils import FeatureExtractionMixin 7 | 8 | def cosine_distance(image_embeds, text_embeds): 9 | normalized_image_embeds = nn.functional.normalize(image_embeds) 10 | normalized_text_embeds = nn.functional.normalize(text_embeds) 11 | return torch.mm(normalized_image_embeds, normalized_text_embeds.t()) 12 | 13 | class FlagOnlySafetyChecker(PreTrainedModel): 14 | config_class = CLIPConfig 15 | 16 | def __init__(self, config: CLIPConfig): 17 | super().__init__(config) 18 | 19 | self.vision_model = CLIPVisionModel(config.vision_config) 20 | self.visual_projection = nn.Linear(config.vision_config.hidden_size, config.projection_dim, bias=False) 21 | 22 | self.concept_embeds = nn.Parameter(torch.ones(17, config.projection_dim), requires_grad=False) 23 | self.special_care_embeds = nn.Parameter(torch.ones(3, config.projection_dim), requires_grad=False) 24 | 25 | self.register_buffer("concept_embeds_weights", torch.ones(17)) 26 | self.register_buffer("special_care_embeds_weights", torch.ones(3)) 27 | 28 | def __str__(self): 29 | return "FlagOnlySafetyChecker" 30 | 31 | @torch.no_grad() 32 | def forward(self, clip_input, images): 33 | pooled_output = self.vision_model(clip_input)[1] # pooled_output 34 | image_embeds = self.visual_projection(pooled_output) 35 | 36 | special_cos_dist = cosine_distance(image_embeds, self.special_care_embeds).cpu().numpy() 37 | cos_dist = cosine_distance(image_embeds, self.concept_embeds).cpu().numpy() 38 | 39 | result = [] 40 | batch_size = image_embeds.shape[0] 41 | for i in range(batch_size): 42 | result_img = {"special_scores": {}, "special_care": [], "concept_scores": {}, "bad_concepts": []} 43 | 44 | # increase this value to create a stronger `nfsw` filter 45 | # at the cost of increasing the possibility of filtering benign images 46 | adjustment = 0.0 47 | 48 | for concet_idx in range(len(special_cos_dist[0])): 49 | concept_cos = special_cos_dist[i][concet_idx] 50 | concept_threshold = self.special_care_embeds_weights[concet_idx].item() 51 | result_img["special_scores"][concet_idx] = round(concept_cos - concept_threshold + adjustment, 3) 52 | if result_img["special_scores"][concet_idx] > 0: 53 | result_img["special_care"].append({concet_idx, result_img["special_scores"][concet_idx]}) 54 | adjustment = 0.01 55 | 56 | for concet_idx in range(len(cos_dist[0])): 57 | concept_cos = cos_dist[i][concet_idx] 58 | concept_threshold = self.concept_embeds_weights[concet_idx].item() 59 | result_img["concept_scores"][concet_idx] = round(concept_cos - concept_threshold + adjustment, 3) 60 | if result_img["concept_scores"][concet_idx] > 0: 61 | result_img["bad_concepts"].append(concet_idx) 62 | 63 | result.append(result_img) 64 | 65 | has_nsfw_concepts = [len(res["bad_concepts"]) > 0 for res in result] 66 | return images, has_nsfw_concepts 67 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/schedulers/sample_dpmpp_2m.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from tqdm import trange 3 | 4 | 5 | @torch.no_grad() 6 | def sample_dpmpp_2m( 7 | model, 8 | x, 9 | sigmas, 10 | extra_args=None, 11 | callback=None, 12 | disable=None, 13 | warmup_lms=False, 14 | ddim_cutoff=0.0, 15 | ): 16 | """DPM-Solver++(2M).""" 17 | extra_args = {} if extra_args is None else extra_args 18 | s_in = x.new_ones([x.shape[0]]) 19 | sigma_fn = lambda t: t.neg().exp() 20 | t_fn = lambda sigma: sigma.log().neg() 21 | old_denoised = None 22 | 23 | for i in trange(len(sigmas) - 1, disable=disable): 24 | denoised = model(x, sigmas[i] * s_in, **extra_args) 25 | if callback is not None: 26 | callback( 27 | { 28 | "x": x, 29 | "i": i, 30 | "sigma": sigmas[i], 31 | "sigma_hat": sigmas[i], 32 | "denoised": denoised, 33 | } 34 | ) 35 | t, t_next = t_fn(sigmas[i]), t_fn(sigmas[i + 1]) 36 | h = t_next - t 37 | if old_denoised is None and warmup_lms: 38 | r = 1 / 2 39 | s = t + r * h 40 | x_2 = (sigma_fn(s) / sigma_fn(t)) * x - (-h * r).expm1() * denoised 41 | denoised_i = model(x_2, sigma_fn(s) * s_in, **extra_args) 42 | elif sigmas[i + 1] <= ddim_cutoff or old_denoised is None: 43 | denoised_i = denoised 44 | else: 45 | h_last = t - t_fn(sigmas[i - 1]) 46 | r = h_last / h 47 | denoised_i = (1 + 1 / (2 * r)) * denoised - (1 / (2 * r)) * old_denoised 48 | x = (sigma_fn(t_next) / sigma_fn(t)) * x - (-h).expm1() * denoised_i 49 | old_denoised = denoised 50 | return x 51 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/text_embedding/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .text_embedding import TextEmbedding 3 | from .basic_text_embedding import BasicTextEmbedding 4 | from .lpw_text_embedding import LPWTextEmbedding 5 | from .structured_text_embedding import StructuredTextEmbedding, KeyValueTensors 6 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/text_embedding/basic_text_embedding.py: -------------------------------------------------------------------------------- 1 | from diffusers.utils import logging 2 | 3 | logger = logging.get_logger(__name__) # pylint: disable=invalid-name 4 | 5 | from .text_embedding import TextEmbedding 6 | 7 | 8 | class BasicTextEmbedding(TextEmbedding): 9 | def __init__(self, pipe, text_encoder, **kwargs): 10 | super().__init__(pipe, text_encoder, **kwargs) 11 | 12 | def _get_embeddedings(self, strings, label): 13 | tokenizer = self.tokenizer 14 | 15 | max_length = min( 16 | tokenizer.model_max_length, 17 | self.text_encoder.config.max_position_embeddings, 18 | ) 19 | 20 | # get prompt text embeddings 21 | text_inputs = tokenizer( 22 | strings, 23 | padding="max_length", 24 | max_length=max_length, 25 | return_tensors="pt", 26 | ) 27 | text_input_ids = text_inputs.input_ids 28 | 29 | if text_input_ids.shape[-1] > max_length: 30 | removed_text = tokenizer.batch_decode(text_input_ids[:, max_length:]) 31 | logger.warning( 32 | f"The following part of your {label} input was truncated because CLIP can only handle sequences up to " 33 | f"{max_length} tokens: {removed_text}" 34 | ) 35 | text_input_ids = text_input_ids[:, :max_length] 36 | 37 | text_embeddings = self.text_encoder(text_input_ids.to(self.device)) 38 | 39 | return text_embeddings[0] 40 | 41 | def get_text_embeddings(self, prompt): 42 | return self._get_embeddedings(prompt.as_unweighted_string(), "prompt") 43 | 44 | def get_uncond_embeddings(self, prompt): 45 | return self._get_embeddedings(prompt.as_unweighted_string(), "negative prompt") 46 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/text_embedding/text_embedding.py: -------------------------------------------------------------------------------- 1 | class TextEmbedding: 2 | def __init__(self, pipe, text_encoder, **kwargs): 3 | self.pipe = pipe 4 | self.tokenizer = pipe.tokenizer 5 | self.text_encoder = text_encoder 6 | self.device = pipe.execution_device 7 | 8 | def get_text_embeddings(self, prompt): 9 | raise NotImplementedError("Not implemented") 10 | 11 | def get_uncond_embeddings(self, prompt): 12 | raise NotImplementedError("Not implemented") 13 | 14 | def get_embeddings(self, prompt, uncond_prompt=None): 15 | """Prompt and negative a both expected to be lists of strings, and matching in length""" 16 | text_embeddings = self.get_text_embeddings(prompt) 17 | uncond_embeddings = ( 18 | self.get_uncond_embeddings(uncond_prompt) 19 | if uncond_prompt is not None 20 | else None 21 | ) 22 | 23 | return (text_embeddings, uncond_embeddings) 24 | 25 | def repeat(self, embedding, count): 26 | bs_embed, seq_len, _ = embedding.shape 27 | embedding = embedding.repeat(1, count, 1) 28 | embedding = embedding.view(bs_embed * count, seq_len, -1) 29 | 30 | return embedding 31 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/text_embedding/text_encoder_alt_layer.py: -------------------------------------------------------------------------------- 1 | from typing import Literal 2 | 3 | 4 | class TextEncoderAltLayer: 5 | def __init__( 6 | self, 7 | text_encoder, 8 | layer: Literal["final", "penultimate"] | int = "final", 9 | ): 10 | self.text_encoder = text_encoder 11 | self.layer = layer 12 | 13 | def __call__(self, input_ids): 14 | text_embeddings = self.text_encoder( 15 | input_ids, 16 | output_hidden_states=(self.layer != "final"), 17 | return_dict=True, 18 | ) 19 | 20 | if self.layer == "final": 21 | res = text_embeddings.last_hidden_state 22 | elif self.layer == "penultimate": 23 | res = self.text_encoder.text_model.final_layer_norm( 24 | text_embeddings.hidden_states[-2] 25 | ) 26 | else: 27 | res = self.text_encoder.text_model.final_layer_norm( 28 | text_embeddings.hidden_states[self.layer] 29 | ) 30 | 31 | # text_encoder clients expect tuple of (final layer, pool) 32 | return (res, None) 33 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/unet/cfg.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | import torch 4 | 5 | from sdgrpcserver.pipeline.unet.types import ( 6 | EpsTensor, 7 | NoisePredictionUNet, 8 | ScheduleTimestep, 9 | XtTensor, 10 | ) 11 | 12 | 13 | @dataclass 14 | class CFGChildUnets: 15 | g: NoisePredictionUNet 16 | u: NoisePredictionUNet 17 | f: NoisePredictionUNet 18 | 19 | def wrap_all(self, wrapper, *args, **kwargs): 20 | return CFGChildUnets( 21 | g=wrapper(self.g, *args, **kwargs), 22 | u=wrapper(self.u, *args, **kwargs), 23 | f=wrapper(self.f, *args, **kwargs), 24 | ) 25 | 26 | 27 | class CFGUnet_Seperated: 28 | def __init__(self, cfg_unets: CFGChildUnets, guidance_scale, batch_total): 29 | self.cfg_unets = cfg_unets 30 | self.guidance_scale = guidance_scale 31 | self.batch_total = batch_total 32 | 33 | def __call__(self, latents: XtTensor, t: ScheduleTimestep) -> EpsTensor: 34 | noise_pred_g = self.cfg_unets.g(latents, t) 35 | noise_pred_u = self.cfg_unets.u(latents, t) 36 | 37 | noise_pred = noise_pred_u + self.guidance_scale * (noise_pred_g - noise_pred_u) 38 | return noise_pred 39 | 40 | 41 | class CFGUnet: 42 | def __init__(self, cfg_unets: CFGChildUnets, guidance_scale, batch_total): 43 | self.cfg_unets = cfg_unets 44 | self.guidance_scale = guidance_scale 45 | self.batch_total = batch_total 46 | 47 | def __call__(self, latents: XtTensor, t: ScheduleTimestep) -> EpsTensor: 48 | latents = torch.cat([latents, latents]) 49 | 50 | if isinstance(t, torch.Tensor) and t.shape: 51 | t = torch.cat([t, t]) 52 | 53 | noise_pred = self.cfg_unets.f(latents, t) 54 | noise_pred_u, noise_pred_g = noise_pred.chunk(2) 55 | 56 | noise_pred = noise_pred_u + self.guidance_scale * (noise_pred_g - noise_pred_u) 57 | return noise_pred 58 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/unet/core.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from sdgrpcserver.pipeline.unet.types import ( 4 | DiffusersUNet, 5 | EpsTensor, 6 | ScheduleTimestep, 7 | XtTensor, 8 | ) 9 | 10 | 11 | class UNetWithEmbeddings: 12 | def __init__(self, unet: DiffusersUNet, text_embeddings: torch.Tensor): 13 | self.unet = unet 14 | self.text_embeddings = text_embeddings 15 | 16 | def __call__(self, latents: XtTensor, t: ScheduleTimestep) -> EpsTensor: 17 | return self.unet(latents, t, encoder_hidden_states=self.text_embeddings).sample 18 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/unet/graft.py: -------------------------------------------------------------------------------- 1 | from typing import cast 2 | 3 | import torch 4 | 5 | from sdgrpcserver.pipeline.easing import Easing 6 | from sdgrpcserver.pipeline.randtools import batched_rand 7 | from sdgrpcserver.pipeline.unet.types import ( 8 | DiffusersSchedulerUNet, 9 | GenericSchedulerUNet, 10 | KDiffusionSchedulerUNet, 11 | PX0Tensor, 12 | XtTensor, 13 | ) 14 | 15 | 16 | class GraftUnets(GenericSchedulerUNet): 17 | def __init__( 18 | self, 19 | unet_root: DiffusersSchedulerUNet | KDiffusionSchedulerUNet, 20 | unet_top: DiffusersSchedulerUNet | KDiffusionSchedulerUNet, 21 | generators: list[torch.Generator], 22 | ): 23 | self.unet_root = unet_root 24 | self.unet_top = unet_top 25 | self.generators = generators 26 | 27 | self.easing = Easing(floor=0, start=0.1, end=0.3, easing="sine") 28 | 29 | def __call__(self, latents: XtTensor, __step, u: float) -> PX0Tensor | XtTensor: 30 | p = self.easing.interp(u) 31 | 32 | if p <= 0: 33 | return self.unet_root(latents, __step, u=u) 34 | elif p >= 1: 35 | return self.unet_top(latents, __step, u=u) 36 | 37 | root = self.unet_root(latents, __step, u=u) 38 | top = self.unet_top(latents, __step, u=u) 39 | 40 | # Build a map of 0..1 like latents 41 | randmap = batched_rand(top.shape, self.generators, top.device, top.dtype) 42 | 43 | # Linear blend between base and graft 44 | res = cast(type(top), torch.where(randmap >= p, root, top)) 45 | 46 | return res 47 | 48 | @classmethod 49 | def merge_initial_latents(cls, left, right): 50 | return left 51 | 52 | @classmethod 53 | def split_result(cls, left, right): 54 | return right 55 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/unet/hires_fix.py: -------------------------------------------------------------------------------- 1 | from typing import cast 2 | 3 | import torch 4 | import torchvision.transforms as T 5 | 6 | from sdgrpcserver import resize_right 7 | from sdgrpcserver.pipeline.easing import Easing 8 | from sdgrpcserver.pipeline.randtools import batched_rand 9 | from sdgrpcserver.pipeline.unet.types import ( 10 | DiffusersSchedulerUNet, 11 | GenericSchedulerUNet, 12 | KDiffusionSchedulerUNet, 13 | PX0Tensor, 14 | XtTensor, 15 | ) 16 | 17 | # Indexes into a shape for the height and width dimensions 18 | # Negative indexed to work for any number of dimensions 19 | Hi, Wi = -2, -1 20 | 21 | 22 | def pad_like(latents, like, mode="replicate"): 23 | wd = like.shape[Wi] - latents.shape[Wi] 24 | hd = like.shape[Hi] - latents.shape[Hi] 25 | l = wd // 2 26 | r = wd - l 27 | t = hd // 2 28 | b = hd - t 29 | 30 | pad = torch.nn.functional.pad 31 | 32 | if isinstance(mode, int | float): 33 | return pad(latents, pad=(l, r, t, b), mode="constant", value=mode) 34 | else: 35 | return pad(latents, pad=(l, r, t, b), mode=mode) 36 | 37 | 38 | def resize_nearest(latents, scale_factor=1): 39 | hs = int(latents.shape[Hi] * scale_factor) 40 | ws = int(latents.shape[Wi] * scale_factor) 41 | 42 | return T.functional.resize(latents, [hs, ws], T.InterpolationMode.NEAREST) 43 | 44 | 45 | def scale_into(latents, target, scale): 46 | if scale >= 1: 47 | # latents = resize_right.resize(latents, scale_factors=scale, pad_mode="reflect") 48 | latents = resize_nearest(latents, scale) 49 | else: 50 | latents = resize_nearest(latents, scale) 51 | 52 | # Now crop off anything that's outside target shape, and offset if it's inside target shape 53 | 54 | # Positive is offset into the shape, negative is crop amount 55 | offh = (target.shape[Hi] - latents.shape[Hi]) // 2 56 | offw = (target.shape[Wi] - latents.shape[Wi]) // 2 57 | 58 | if offh < 0: 59 | latents = latents[:, :, -offh : -offh + target.shape[Hi], :] 60 | offh = 0 61 | 62 | if offw < 0: 63 | latents = latents[:, :, :, -offw : -offw + target.shape[Wi]] 64 | offw = 0 65 | 66 | target[ 67 | :, :, offh : offh + latents.shape[Hi], offw : offw + latents.shape[Wi] 68 | ] = latents 69 | return target 70 | 71 | 72 | def downscale_into(latents, target, oos_fraction): 73 | scale_min = min( 74 | target.shape[Hi] / latents.shape[Hi], target.shape[Wi] / latents.shape[Wi] 75 | ) 76 | scale_max = max( 77 | target.shape[Hi] / latents.shape[Hi], target.shape[Wi] / latents.shape[Wi] 78 | ) 79 | 80 | # At oos_fraction == 1, we want to downscale to completely contain the latent within 81 | # the square target - i.e. scale_min. At oos_fraction == 0 we want to downscale to 82 | # completely cover the square target - i.e. scale_max 83 | 84 | scale = scale_min * oos_fraction + scale_max * (1 - oos_fraction) 85 | return scale_into(latents, target, scale) 86 | 87 | 88 | def upscale_into(latents, target, oos_fraction): 89 | scale_min = min( 90 | target.shape[Hi] / latents.shape[Hi], target.shape[Wi] / latents.shape[Wi] 91 | ) 92 | scale_max = max( 93 | target.shape[Hi] / latents.shape[Hi], target.shape[Wi] / latents.shape[Wi] 94 | ) 95 | 96 | # At oos_fraction == 1, we want to upscale to completely cover the 97 | # target - i.e. scale_max. At oos_fraction = 0 we want to completely 98 | # fit square latent into OOS targe, i.e. scale_min 99 | 100 | scale = scale_max * oos_fraction + scale_min * (1 - oos_fraction) 101 | return scale_into(latents, target, scale) 102 | 103 | 104 | class HiresUnetWrapper(GenericSchedulerUNet): 105 | def __init__( 106 | self, 107 | unet_natural: DiffusersSchedulerUNet | KDiffusionSchedulerUNet, 108 | unet_hires: DiffusersSchedulerUNet | KDiffusionSchedulerUNet, 109 | generators: list[torch.Generator], 110 | natural_size: torch.Size, 111 | oos_fraction: float, 112 | latent_debugger, 113 | ): 114 | self.unet_natural = unet_natural 115 | self.unet_hires = unet_hires 116 | self.generators = generators 117 | self.natural_size = natural_size 118 | self.oos_fraction = oos_fraction 119 | 120 | self.easing = Easing(floor=0, start=0, end=0.4, easing="sine") 121 | self.latent_debugger = latent_debugger 122 | 123 | def __call__(self, latents: XtTensor, __step, u: float) -> PX0Tensor | XtTensor: 124 | # Linear blend between base and graft 125 | p = self.easing.interp(u) 126 | 127 | lo_in, hi_in = latents.chunk(2) 128 | 129 | if isinstance(__step, torch.Tensor) and __step.shape: 130 | lo_t, hi_t = __step.chunk(2) 131 | else: 132 | lo_t = hi_t = __step 133 | 134 | hi = self.unet_hires(hi_in, hi_t, u=u) 135 | 136 | # Early out if we're passed the graft stage 137 | if p >= 0.999: 138 | return cast(type(hi), torch.concat([lo_in, hi])) 139 | 140 | *_, h, w = latents.shape 141 | th, tw = self.natural_size 142 | 143 | offseth = (h - th) // 2 144 | offsetw = (w - tw) // 2 145 | 146 | lo_in = lo_in[:, :, offseth : offseth + th, offsetw : offsetw + tw] 147 | lo = self.unet_natural(lo_in, lo_t, u=u) 148 | 149 | # Downscale hi and merge into lo 150 | hi_downscaled = torch.zeros_like(lo) # Un-overlapped space is zero 151 | hi_downscaled = downscale_into(hi, hi_downscaled, self.oos_fraction) 152 | 153 | randmap = batched_rand(lo.shape, self.generators, lo.device, lo.dtype) 154 | lo_merged = torch.where(randmap >= p, lo, hi_downscaled) 155 | 156 | # Upscale lo and merge it back into hi 157 | lo_upscaled = hi.clone() # Un-overlapped space copied from hi 158 | lo_upscaled = upscale_into(lo, lo_upscaled, self.oos_fraction) 159 | 160 | randmap = batched_rand(hi.shape, self.generators, hi.device, hi.dtype) 161 | hi_merged = torch.where(randmap >= p, lo_upscaled, hi) 162 | 163 | # Expand lo back to full tensor size by wrapping with 0 164 | lo_expanded = torch.zeros_like(hi_merged) 165 | lo_expanded[:, :, offseth : offseth + th, offsetw : offsetw + tw] = lo_merged 166 | 167 | self.latent_debugger.log("hires_lo", int(u * 1000), lo_expanded[0:1]) 168 | self.latent_debugger.log("hires_hi", int(u * 1000), hi_merged[0:1]) 169 | 170 | res = torch.concat([lo_expanded, hi_merged]) 171 | return cast(type(hi), res) 172 | 173 | @classmethod 174 | def image_to_natural( 175 | cls, 176 | natural_size: int, 177 | image: torch.Tensor, 178 | oos_fraction: float, 179 | fill=torch.zeros, 180 | ): 181 | natural_image_size = (*image.shape[:-2], natural_size, natural_size) 182 | natural_image = fill(natural_image_size, device=image.device, dtype=image.dtype) 183 | 184 | downscale_into(image, natural_image, oos_fraction) 185 | return natural_image 186 | 187 | @classmethod 188 | def merge_initial_latents(cls, left, right): 189 | left_resized = torch.zeros_like(right) 190 | 191 | *_, th, tw = left.shape 192 | *_, h, w = right.shape 193 | 194 | offseth = (h - th) // 2 195 | offsetw = (w - tw) // 2 196 | 197 | left_resized[:, :, offseth : offseth + th, offsetw : offsetw + tw] = left 198 | right[:, :, offseth : offseth + th, offsetw : offsetw + tw] = left 199 | return torch.concat([left_resized, right]) 200 | 201 | @classmethod 202 | def split_result(cls, left, right): 203 | return right.chunk(2)[1] 204 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/unet/hires_fix_other.py: -------------------------------------------------------------------------------- 1 | from typing import Literal, Sequence, cast 2 | 3 | import torch 4 | 5 | from sdgrpcserver import resize_right 6 | from sdgrpcserver.pipeline.easing import Easing 7 | from sdgrpcserver.pipeline.randtools import batched_rand 8 | from sdgrpcserver.pipeline.unet.types import ( 9 | DiffusersSchedulerUNet, 10 | EpsTensor, 11 | GenericSchedulerUNet, 12 | KDiffusionSchedulerUNet, 13 | PX0Tensor, 14 | ScheduleTimestep, 15 | XtTensor, 16 | ) 17 | 18 | # from sdgrpcserver.pipeline.unet.types import * 19 | 20 | 21 | def match_shape(latents: torch.Tensor, target: torch.Size): 22 | # If it's already the right size, just return it 23 | if latents.shape[-len(target) :] == target: 24 | return latents 25 | 26 | # Maybe scale it? 27 | scale = max(target[0] / latents.shape[2], target[1] / latents.shape[3]) 28 | if scale != 1: 29 | latents = resize_right.resize(latents, scale_factors=scale, pad_mode="reflect") 30 | 31 | # If we don't need to crop, skip that bit 32 | if latents.shape[-len(target) :] == target: 33 | return latents 34 | 35 | offset2 = (latents.shape[2] - target[0]) // 2 36 | offset3 = (latents.shape[3] - target[1]) // 2 37 | 38 | return latents[:, :, offset2 : offset2 + target[0], offset3 : offset3 + target[1]] 39 | 40 | 41 | class HiresUnetEpsWrapper: 42 | def __init__( 43 | self, 44 | parent: "HiresUnetWrapper", 45 | unet: DiffusersSchedulerUNet | KDiffusionSchedulerUNet, 46 | target: torch.Size, 47 | ): 48 | self.parent = parent 49 | self.unet = unet 50 | self.target = target 51 | 52 | def __call__(self, latents: XtTensor, t: ScheduleTimestep) -> EpsTensor: 53 | if self.parent.mode == "lo": 54 | *_, h, w = latents.shape 55 | th, tw = self.target 56 | 57 | offseth = (h - th) // 2 58 | offsetw = (w - tw) // 2 59 | 60 | in_latents = latents[:, :, offseth : offseth + th, offsetw : offsetw + tw] 61 | 62 | res = self.unet(in_latents, t) 63 | 64 | expanded = torch.zeros( 65 | (*res.shape[:2], *latents.shape[2:]), 66 | dtype=latents.dtype, 67 | device=latents.device, 68 | ) 69 | expanded[:, :, offseth : offseth + th, offsetw : offsetw + tw] = res 70 | 71 | return expanded 72 | 73 | else: 74 | return self.unet(latents, t) 75 | 76 | 77 | class HiresUnetGenericWrapper(GenericSchedulerUNet): 78 | def __init__( 79 | self, 80 | parent: "HiresUnetWrapper", 81 | unet: DiffusersSchedulerUNet | KDiffusionSchedulerUNet, 82 | generators: list[torch.Generator], 83 | target: torch.Size, 84 | latent_debugger, 85 | ): 86 | self.parent = parent 87 | self.unet = unet 88 | self.generators = generators 89 | self.target = target 90 | 91 | self.easing = Easing(floor=0, start=0, end=0.3, easing="quartic") 92 | self.latent_debugger = latent_debugger 93 | 94 | def __call__(self, latents: XtTensor, __step, u: float) -> PX0Tensor | XtTensor: 95 | # Linear blend between base and graft 96 | p = self.easing.interp(u) 97 | 98 | lo_in, hi_in = latents.chunk(2) 99 | 100 | if isinstance(__step, torch.Tensor) and __step.shape: 101 | lo_t, hi_t = __step.chunk(2) 102 | else: 103 | lo_t = hi_t = __step 104 | 105 | self.parent.mode = "hi" 106 | hi = self.unet(hi_in, hi_t, u=u) 107 | 108 | # Early out if we're passed the graft stage 109 | if p >= 0.999: 110 | return cast(type(hi), torch.concat([lo_in, hi])) 111 | 112 | *_, h, w = latents.shape 113 | th, tw = self.target 114 | 115 | offseth = (h - th) // 2 116 | offsetw = (w - tw) // 2 117 | 118 | self.parent.mode = "lo" 119 | lo = self.unet(lo_in, lo_t, u=u)[ 120 | :, :, offseth : offseth + th, offsetw : offsetw + tw 121 | ] 122 | 123 | # Crop hi and merge it back into lo 124 | hi_crop = hi[:, :, offseth : offseth + th, offsetw : offsetw + tw] 125 | 126 | randmap = batched_rand(lo.shape, self.generators, lo.device, lo.dtype) 127 | lo_merged = torch.where(randmap >= p, lo, hi_crop) 128 | 129 | # Scale lo and merge it back into hi 130 | lo_scaled = match_shape(lo, hi.shape[-2:]) 131 | 132 | randmap = batched_rand(hi.shape, self.generators, hi.device, hi.dtype) 133 | hi_merged = torch.where(randmap >= p, lo_scaled, hi) 134 | 135 | # Expand lo back to full tensor size by wrapping with 0 136 | lo_expanded = torch.zeros_like(hi_merged) 137 | lo_expanded[:, :, offseth : offseth + th, offsetw : offsetw + tw] = lo_merged 138 | 139 | self.latent_debugger.log("hires_lo", int(u * 1000), lo_expanded[0:1]) 140 | self.latent_debugger.log("hires_hi", int(u * 1000), hi_merged[0:1]) 141 | 142 | res = torch.concat([lo_expanded, hi_merged]) 143 | return cast(type(hi), res) 144 | 145 | 146 | class HiresUnetWrapper: 147 | def __init__( 148 | self, 149 | generators: list[torch.Generator], 150 | target: torch.Size, 151 | latent_debugger, 152 | ): 153 | self.generators = generators 154 | self.target = target 155 | self.latent_debugger = latent_debugger 156 | 157 | self.mode: Literal["lo", "hi"] = "lo" 158 | 159 | def get_eps_wrapper(self, unet): 160 | return HiresUnetEpsWrapper(self, unet, self.target) 161 | 162 | def get_generic_wrapper(self, unet): 163 | return HiresUnetGenericWrapper( 164 | self, unet, self.generators, self.target, self.latent_debugger 165 | ) 166 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/unet/hires_fix_resize.py: -------------------------------------------------------------------------------- 1 | from typing import cast 2 | 3 | import torch 4 | import torchvision.transforms as T 5 | 6 | from sdgrpcserver import resize_right 7 | from sdgrpcserver.pipeline.easing import Easing 8 | from sdgrpcserver.pipeline.randtools import batched_rand 9 | from sdgrpcserver.pipeline.unet.types import ( 10 | DiffusersSchedulerUNet, 11 | GenericSchedulerUNet, 12 | KDiffusionSchedulerUNet, 13 | PX0Tensor, 14 | XtTensor, 15 | ) 16 | 17 | 18 | def match_shape(latents: torch.Tensor, target: torch.Size): 19 | # If it's already the right size, just return it 20 | if latents.shape[-len(target) :] == target: 21 | return latents 22 | 23 | # Maybe scale it? 24 | scale = max(target[0] / latents.shape[2], target[1] / latents.shape[3]) 25 | if scale != 1: 26 | latents = resize_right.resize(latents, scale_factors=scale, pad_mode="reflect") 27 | 28 | # If we don't need to crop, skip that bit 29 | if latents.shape[-len(target) :] == target: 30 | return latents 31 | 32 | offset2 = (latents.shape[2] - target[0]) // 2 33 | offset3 = (latents.shape[3] - target[1]) // 2 34 | 35 | return latents[:, :, offset2 : offset2 + target[0], offset3 : offset3 + target[1]] 36 | 37 | 38 | class HiresUnetWrapper(GenericSchedulerUNet): 39 | def __init__( 40 | self, 41 | unet_natural: DiffusersSchedulerUNet | KDiffusionSchedulerUNet, 42 | unet_hires: DiffusersSchedulerUNet | KDiffusionSchedulerUNet, 43 | generators: list[torch.Generator], 44 | target: torch.Size, 45 | latent_debugger, 46 | ): 47 | self.unet_natural = unet_natural 48 | self.unet_hires = unet_hires 49 | self.generators = generators 50 | self.target = target 51 | 52 | self.easing = Easing(floor=0, start=0, end=0.4, easing="sine") 53 | self.latent_debugger = latent_debugger 54 | 55 | def __call__(self, latents: XtTensor, __step, u: float) -> PX0Tensor | XtTensor: 56 | # Linear blend between base and graft 57 | p = self.easing.interp(u) 58 | 59 | lo_in, hi_in = latents.chunk(2) 60 | 61 | if isinstance(__step, torch.Tensor) and __step.shape: 62 | lo_t, hi_t = __step.chunk(2) 63 | else: 64 | lo_t = hi_t = __step 65 | 66 | hi = self.unet_hires(hi_in, hi_t, u=u) 67 | 68 | # Early out if we're passed the graft stage 69 | if p >= 0.999: 70 | return cast(type(hi), torch.concat([lo_in, hi])) 71 | 72 | *_, h, w = latents.shape 73 | th, tw = self.target 74 | 75 | offseth = (h - th) // 2 76 | offsetw = (w - tw) // 2 77 | 78 | lo_in = lo_in[:, :, offseth : offseth + th, offsetw : offsetw + tw] 79 | lo = self.unet_natural(lo_in, lo_t, u=u) 80 | 81 | # Crop hi and merge it back into lo 82 | scale = min(tw / w, th / h) 83 | 84 | h_s = int(h * scale) 85 | w_s = int(w * scale) 86 | 87 | offseth2 = (th - h_s) // 2 88 | offsetw2 = (tw - w_s) // 2 89 | 90 | image_slice = ( 91 | slice(0, None), 92 | slice(0, None), 93 | slice(offseth2, offseth2 + h_s), 94 | slice(offsetw2, offsetw2 + w_s), 95 | ) 96 | 97 | hi_crop = torch.zeros_like(lo) 98 | # T.functional.resize(hi, [th, tw], T.InterpolationMode.NEAREST) 99 | hi_crop[image_slice] = T.functional.resize( 100 | hi, [h_s, w_s], T.InterpolationMode.NEAREST 101 | ) 102 | 103 | # hi_crop = hi[:, :, offseth : offseth + th, offsetw : offsetw + tw] 104 | 105 | randmap = batched_rand(lo.shape, self.generators, lo.device, lo.dtype) 106 | lo_merged = torch.where(randmap >= p, lo, hi_crop) 107 | 108 | # Scale lo and merge it back into hi 109 | lo_scaled = match_shape(lo, hi.shape[-2:]) 110 | 111 | randmap = batched_rand(hi.shape, self.generators, hi.device, hi.dtype) 112 | hi_merged = torch.where(randmap >= p, lo_scaled, hi) 113 | 114 | # Expand lo back to full tensor size by wrapping with 0 115 | lo_expanded = torch.zeros_like(hi_merged) 116 | lo_expanded[:, :, offseth : offseth + th, offsetw : offsetw + tw] = lo_merged 117 | 118 | self.latent_debugger.log("hires_lo", int(u * 1000), lo_expanded[0:1]) 119 | self.latent_debugger.log("hires_hi", int(u * 1000), hi_merged[0:1]) 120 | 121 | res = torch.concat([lo_expanded, hi_merged]) 122 | return cast(type(hi), res) 123 | 124 | @classmethod 125 | def image_to_natural(cls, natural_size: int, image: torch.Tensor, fill=torch.zeros): 126 | *_, height, width = image.shape 127 | scale = min(natural_size / width, natural_size / height) 128 | 129 | height_scaled = int(height * scale) 130 | width_scaled = int(width * scale) 131 | 132 | offseth = (natural_size - height_scaled) // 2 133 | offsetw = (natural_size - width_scaled) // 2 134 | 135 | image_slice = ( 136 | slice(0, None), 137 | slice(0, None), 138 | slice(offseth, offseth + height_scaled), 139 | slice(offsetw, offsetw + width_scaled), 140 | ) 141 | 142 | natural_image_size = (*image.shape[:-2], natural_size, natural_size) 143 | 144 | natural_image = fill(natural_image_size, device=image.device, dtype=image.dtype) 145 | 146 | natural_image[image_slice] = resize_right.resize( 147 | image, scale_factors=scale, pad_mode="reflect" 148 | ) 149 | 150 | return natural_image 151 | 152 | @classmethod 153 | def merge_initial_latents(cls, left, right): 154 | left_resized = torch.zeros_like(right) 155 | 156 | *_, th, tw = left.shape 157 | *_, h, w = right.shape 158 | 159 | offseth = (h - th) // 2 160 | offsetw = (w - tw) // 2 161 | 162 | left_resized[:, :, offseth : offseth + th, offsetw : offsetw + tw] = left 163 | return torch.concat([left_resized, right]) 164 | 165 | @classmethod 166 | def split_result(cls, left, right): 167 | return right.chunk(2)[1] 168 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/unet/types.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | from typing import NewType, Protocol, overload 3 | 4 | from torch import Tensor 5 | 6 | # Some types to describe the various structures of unet. First some return types 7 | 8 | # An Xt (ie a sample that includes some amount of noise) 9 | XtTensor = Tensor 10 | # The predicted noise in a sample (eps) 11 | EpsTensor = Tensor 12 | # The predicted X0 (i.e Xt - PredictedNoise) 13 | PX0Tensor = Tensor 14 | 15 | # Sigma 16 | ScheduleSigma = float | Tensor 17 | # Timestep (from 1000 to 0 usually) 18 | ScheduleTimestep = int | Tensor 19 | # Progress float, range [0..1) 20 | ScheduleProgress = NewType("Progress", float) 21 | 22 | 23 | # The Core Diffusers UNet 24 | class DiffusersUNetOutput(Protocol): 25 | sample: EpsTensor 26 | 27 | 28 | class DiffusersUNet(Protocol): 29 | @abstractmethod 30 | def __call__( 31 | self, latents: XtTensor, t: ScheduleTimestep, encoder_hidden_states: Tensor 32 | ) -> DiffusersUNetOutput: 33 | raise NotImplementedError 34 | 35 | 36 | # A Wrapped UNet where the hidden_state argument inside the wrapping 37 | class NoisePredictionUNet(Protocol): 38 | @abstractmethod 39 | def __call__(self, latents: XtTensor, t: ScheduleTimestep) -> EpsTensor: 40 | raise NotImplementedError 41 | 42 | 43 | # A KDiffusion wrapped UNet 44 | class KDiffusionSchedulerUNet(Protocol): 45 | @abstractmethod 46 | def __call__(self, latents: XtTensor, sigma: ScheduleSigma, u: float) -> PX0Tensor: 47 | raise NotImplementedError 48 | 49 | 50 | class DiffusersSchedulerUNet(Protocol): 51 | @abstractmethod 52 | def __call__(self, latents: XtTensor, t: ScheduleTimestep, u: float) -> XtTensor: 53 | raise NotImplementedError 54 | 55 | 56 | class GenericSchedulerUNet: 57 | @overload 58 | @abstractmethod 59 | def __call__(self, latents: XtTensor, sigma: ScheduleSigma, u: float) -> PX0Tensor: 60 | pass 61 | 62 | @overload 63 | @abstractmethod 64 | def __call__(self, latents: XtTensor, t: ScheduleTimestep, u: float) -> XtTensor: 65 | pass 66 | 67 | @abstractmethod 68 | def __call__(self, latents: XtTensor, __step, u: float) -> PX0Tensor | XtTensor: 69 | raise NotImplementedError 70 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/vae_approximator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class VaeApproximator: 5 | """Decodes latent data to an approximate representation in RGB. 6 | Values determined experimentally for Stable Diffusion 1.4. 7 | See https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/2 8 | """ 9 | 10 | # grayscale_factors = torch.tensor([ 11 | # # R G B 12 | # [ 0.342, 0.341, 0.343 ], # L1 13 | # [ 0.342, 0.342, 0.340 ], # L2 14 | # [-0.110, -0.110, -0.113 ], # L3 15 | # [-0.208, -0.209, -0.208 ] # L4 16 | # ]) 17 | 18 | def __init__( 19 | self, device: torch.device | None = None, dtype: torch.dtype | None = None 20 | ): 21 | self.latent_rgb_factors = torch.tensor( 22 | [ 23 | # R G B 24 | [0.298, 0.207, 0.208], # L1 25 | [0.187, 0.286, 0.173], # L2 26 | [-0.158, 0.189, 0.264], # L3 27 | [-0.184, -0.271, -0.473], # L4 28 | ], 29 | dtype=dtype, 30 | device=device, 31 | ) 32 | 33 | @classmethod 34 | def for_pipeline(cls, pipeline): 35 | return cls(device=pipeline.execution_device, dtype=pipeline.unet.dtype) 36 | 37 | def __call__(self, latents): 38 | """Get an RGB JPEG representation of the latent data.""" 39 | self.to(latents.device, latents.dtype) 40 | return torch.einsum("...lhw,lr -> ...rhw", latents, self.latent_rgb_factors) 41 | 42 | def to(self, device, dtype): 43 | self.latent_rgb_factors = self.latent_rgb_factors.to(device, dtype) 44 | -------------------------------------------------------------------------------- /sdgrpcserver/pipeline/xformers_utils.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | import torch 4 | from diffusers.utils.import_utils import is_xformers_available 5 | 6 | 7 | @functools.cache 8 | def xformers_mea_available(): 9 | available = False 10 | 11 | if is_xformers_available(): 12 | try: 13 | from xformers.ops import memory_efficient_attention 14 | 15 | # Make sure we can run the memory efficient attention 16 | _ = memory_efficient_attention( 17 | torch.randn((1, 2, 40), device="cuda"), 18 | torch.randn((1, 2, 40), device="cuda"), 19 | torch.randn((1, 2, 40), device="cuda"), 20 | ) 21 | except Exception: 22 | pass 23 | else: 24 | available = True 25 | 26 | return available 27 | -------------------------------------------------------------------------------- /sdgrpcserver/ram_monitor.py: -------------------------------------------------------------------------------- 1 | import threading 2 | import time 3 | 4 | import psutil 5 | import pynvml 6 | 7 | 8 | def mb(v): 9 | return f"{v / 1024 / 1024 :.2f}MB" 10 | 11 | 12 | UPDATE_PERIOD = 0.001 13 | 14 | 15 | class RamMonitor(threading.Thread): 16 | stop_flag = False 17 | ram_current = 0 18 | ram_max_usage = 0 19 | vram_current = 0 20 | vram_max_usage = 0 21 | 22 | total = -1 23 | 24 | def __init__(self): 25 | threading.Thread.__init__(self) 26 | 27 | def run(self): 28 | ps = psutil.Process() 29 | 30 | self.loop_lock = threading.Lock() 31 | 32 | self.vram = False 33 | try: 34 | pynvml.nvmlInit() 35 | self.vram = True 36 | except: 37 | print("Unable to initialize NVIDIA management. No VRAM stats. \n") 38 | return 39 | 40 | print("Recording max memory usage...") 41 | 42 | self.ram_total = psutil.virtual_memory().total 43 | 44 | handle = pynvml.nvmlDeviceGetHandleByIndex(0) 45 | self.vram_total = pynvml.nvmlDeviceGetMemoryInfo(handle).total 46 | 47 | print(f"Total available RAM: {mb(self.ram_total)}, VRAM: {mb(self.vram_total)}") 48 | 49 | while not self.stop_flag: 50 | self.ram_current = ps.memory_info().rss 51 | self.ram_max_usage = max(self.ram_max_usage, self.ram_current) 52 | 53 | self.vram_current = pynvml.nvmlDeviceGetMemoryInfo(handle).used 54 | self.vram_max_usage = max(self.vram_max_usage, self.vram_current) 55 | 56 | if self.loop_lock.locked(): 57 | self.loop_lock.release() 58 | 59 | time.sleep(UPDATE_PERIOD) 60 | 61 | print("Stopped recording.") 62 | pynvml.nvmlShutdown() 63 | 64 | def print(self): 65 | # Wait for the update loop to run at least once 66 | self.loop_lock.acquire(timeout=0.5) 67 | print( 68 | f"Current RAM: {mb(self.ram_current)}, VRAM: {mb(self.vram_current)} | " 69 | f"Peak RAM: {mb(self.ram_max_usage)}, VRAM: {mb(self.vram_max_usage)}" 70 | ) 71 | 72 | def read(self): 73 | return dict( 74 | ram_max=self.ram_max_usage, 75 | ram_total=self.ram_total, 76 | vram_max=self.vram_max_usage, 77 | vram_total=self.vram_total, 78 | ) 79 | 80 | def read_and_reset(self): 81 | result = self.read() 82 | self.vram_current = self.ram_current = 0 83 | self.vram_max_usage = self.ram_max_usage = 0 84 | return result 85 | 86 | def stop(self): 87 | self.stop_flag = True 88 | 89 | def read_and_stop(self): 90 | self.stop() 91 | return self.read() 92 | -------------------------------------------------------------------------------- /sdgrpcserver/resize_right.py: -------------------------------------------------------------------------------- 1 | # Redirect to the embedded git submodule 2 | 3 | import sys 4 | from typing import TypeVar, cast 5 | 6 | import numpy as np 7 | import torch 8 | 9 | from sdgrpcserver.src.ResizeRight import interp_methods 10 | 11 | sys.modules["interp_methods"] = interp_methods 12 | from sdgrpcserver.src.ResizeRight import resize_right # noqa: E402 13 | 14 | T = TypeVar("T", bound=torch.Tensor | np.ndarray) 15 | 16 | 17 | def resize( 18 | input: T, 19 | scale_factors=None, 20 | out_shape=None, 21 | interp_method=interp_methods.cubic, 22 | support_sz=None, 23 | antialiasing=True, 24 | by_convs=False, 25 | scale_tolerance=None, 26 | max_numerator=10, 27 | pad_mode="constant", 28 | ) -> T: 29 | result = resize_right.resize( 30 | input, 31 | scale_factors=scale_factors, 32 | out_shape=out_shape, 33 | interp_method=interp_method, 34 | support_sz=support_sz, 35 | antialiasing=antialiasing, 36 | by_convs=by_convs, 37 | scale_tolerance=scale_tolerance, 38 | max_numerator=max_numerator, 39 | pad_mode=pad_mode, 40 | ) 41 | 42 | if isinstance(result, torch.Tensor): 43 | result = result.to(cast(torch.Tensor, input).dtype) 44 | else: 45 | result = result.astype(cast(np.ndarray, input).dtype) 46 | 47 | return cast(T, result) 48 | -------------------------------------------------------------------------------- /sdgrpcserver/services/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/sdgrpcserver/services/__init__.py -------------------------------------------------------------------------------- /sdgrpcserver/services/dashboard.py: -------------------------------------------------------------------------------- 1 | import dashboard_pb2, dashboard_pb2_grpc 2 | 3 | class DashboardServiceServicer(dashboard_pb2_grpc.DashboardServiceServicer): 4 | def __init__(self): 5 | pass 6 | 7 | def GetMe(self, request, context): 8 | user = dashboard_pb2.User() 9 | user.id="0000-0000-0000-0001" 10 | return user 11 | -------------------------------------------------------------------------------- /sdgrpcserver/services/engines.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | import engines_pb2 4 | import engines_pb2_grpc 5 | import generation_pb2 6 | 7 | 8 | class EnginesServiceServicer(engines_pb2_grpc.EnginesServiceServicer): 9 | def __init__(self, manager): 10 | self._manager = manager 11 | 12 | def ListEngines(self, request, context): 13 | engines = engines_pb2.Engines() 14 | 15 | all_noise_types = [ 16 | generation_pb2.SAMPLER_NOISE_NORMAL, 17 | generation_pb2.SAMPLER_NOISE_BROWNIAN, 18 | ] 19 | normal_only = [generation_pb2.SAMPLER_NOISE_NORMAL] 20 | 21 | status = self._manager.getStatus() 22 | for engine in self._manager.engines: 23 | if not ( 24 | engine.get("id", False) 25 | and engine.get("enabled", False) 26 | and engine.get("visible", False) 27 | ): 28 | continue 29 | 30 | info = engines_pb2.EngineInfo() 31 | info.id = engine["id"] 32 | info.name = engine["name"] 33 | info.description = engine["description"] 34 | info.owner = "stable-diffusion-grpcserver" 35 | info.ready = status.get(engine["id"], False) 36 | info.type = engines_pb2.EngineType.PICTURE 37 | 38 | if info.ready: 39 | pipeline = self._manager._pipelines[engine["id"]] 40 | for k, v in pipeline.get_samplers().items(): 41 | if callable(v): 42 | args = set(inspect.signature(v).parameters.keys()) 43 | 44 | info.supported_samplers.append( 45 | engines_pb2.EngineSampler( 46 | sampler=k, 47 | supports_eta="eta" in args, 48 | supports_churn="churn" in args, 49 | supports_sigma_limits="sigmas" in args 50 | or "sigma_min" in args, 51 | supports_karras_rho="sigmas" in args, 52 | supported_noise_types=all_noise_types 53 | if "noise_sampler" in args 54 | else normal_only, 55 | ) 56 | ) 57 | else: 58 | args = set(inspect.signature(v.step).parameters.keys()) 59 | 60 | info.supported_samplers.append( 61 | engines_pb2.EngineSampler( 62 | sampler=k, supports_eta="eta" in args 63 | ) 64 | ) 65 | 66 | engines.engine.append(info) 67 | 68 | return engines 69 | -------------------------------------------------------------------------------- /sdgrpcserver/sonora/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/sdgrpcserver/sonora/__init__.py -------------------------------------------------------------------------------- /sdgrpcserver/sonora/__version__.py: -------------------------------------------------------------------------------- 1 | VERSION = (0, 1, 1) 2 | 3 | __version__ = ".".join(map(str, VERSION)) 4 | -------------------------------------------------------------------------------- /sdgrpcserver/sonora/aio.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import io 3 | 4 | import aiohttp 5 | import grpc.experimental.aio 6 | 7 | from sdgrpcserver.sonora import client 8 | from sdgrpcserver.sonora import protocol 9 | 10 | def insecure_web_channel(url): 11 | return WebChannel(url) 12 | 13 | 14 | class WebChannel: 15 | def __init__(self, url): 16 | if not url.startswith("http") and "://" not in url: 17 | url = f"http://{url}" 18 | 19 | self._url = url 20 | 21 | self._session = aiohttp.ClientSession() 22 | 23 | async def __aenter__(self): 24 | return self 25 | 26 | async def __aexit__(self, exception_type, exception_value, traceback): 27 | await self._session.close() 28 | 29 | def __await__(self): 30 | yield self 31 | 32 | def unary_unary(self, path, request_serializer, response_deserializer): 33 | return UnaryUnaryMulticallable( 34 | self._session, self._url, path, request_serializer, response_deserializer 35 | ) 36 | 37 | def unary_stream(self, path, request_serializer, response_deserializer): 38 | return UnaryStreamMulticallable( 39 | self._session, self._url, path, request_serializer, response_deserializer 40 | ) 41 | 42 | def stream_unary(self, path, request_serializer, response_deserializer): 43 | return client.NotImplementedMulticallable() 44 | 45 | def stream_stream(self, path, request_serializer, response_deserializer): 46 | return client.NotImplementedMulticallable() 47 | 48 | 49 | class UnaryUnaryMulticallable(client.Multicallable): 50 | def __call__(self, request, timeout=None, metadata=None): 51 | call_metadata = self._metadata.copy() 52 | if metadata is not None: 53 | call_metadata.extend(protocol.encode_headers(metadata)) 54 | 55 | return UnaryUnaryCall( 56 | request, 57 | timeout, 58 | call_metadata, 59 | self._rpc_url, 60 | self._session, 61 | self._serializer, 62 | self._deserializer, 63 | ) 64 | 65 | 66 | class UnaryStreamMulticallable(client.Multicallable): 67 | def __call__(self, request, timeout=None, metadata=None): 68 | call_metadata = self._metadata.copy() 69 | if metadata is not None: 70 | call_metadata.extend(protocol.encode_headers(metadata)) 71 | 72 | return UnaryStreamCall( 73 | request, 74 | timeout, 75 | call_metadata, 76 | self._rpc_url, 77 | self._session, 78 | self._serializer, 79 | self._deserializer, 80 | ) 81 | 82 | 83 | class Call(client.Call): 84 | def __enter__(self): 85 | return self 86 | 87 | def __exit__(self, exception_type, exception_value, traceback): 88 | if self._response and not self._response.closed: 89 | self._response.close() 90 | 91 | def __del__(self): 92 | if self._response and not self._response.closed: 93 | self._response.close() 94 | 95 | async def _get_response(self): 96 | if self._response is None: 97 | timeout = aiohttp.ClientTimeout(total=self._timeout) 98 | 99 | self._response = await self._session.post( 100 | self._url, 101 | data=protocol.wrap_message( 102 | False, False, self._serializer(self._request) 103 | ), 104 | headers=dict(self._metadata), 105 | timeout=timeout, 106 | ) 107 | 108 | protocol.raise_for_status(self._response.headers) 109 | 110 | return self._response 111 | 112 | async def initial_metadata(self): 113 | response = await self._get_response() 114 | return response.headers.items() 115 | 116 | async def trailing_metadata(self): 117 | return self._trailers 118 | 119 | 120 | class UnaryUnaryCall(Call): 121 | @Call._raise_timeout(asyncio.TimeoutError) 122 | def __await__(self): 123 | response = yield from self._get_response().__await__() 124 | 125 | data = yield from response.read().__await__() 126 | 127 | response.release() 128 | 129 | if not data: 130 | return 131 | 132 | buffer = io.BytesIO(data) 133 | 134 | messages = protocol.unwrap_message_stream(buffer) 135 | 136 | trailers, _, message = next(messages) 137 | 138 | if trailers: 139 | self._trailers = protocol.unpack_trailers(message) 140 | return 141 | else: 142 | result = self._deserializer(message) 143 | 144 | try: 145 | trailers, _, message = next(messages) 146 | except StopIteration: 147 | pass 148 | else: 149 | if trailers: 150 | self._trailers = protocol.unpack_trailers(message) 151 | else: 152 | raise ValueError("UnaryUnary should only return a single message") 153 | 154 | protocol.raise_for_status(response.headers) 155 | 156 | return result 157 | 158 | 159 | class UnaryStreamCall(Call): 160 | @Call._raise_timeout(asyncio.TimeoutError) 161 | async def read(self): 162 | response = await self._get_response() 163 | 164 | async for trailers, _, message in protocol.unwrap_message_stream_async( 165 | response.content 166 | ): 167 | if trailers: 168 | self._trailers = protocol.unpack_trailers(message) 169 | break 170 | else: 171 | return self._deserializer(message) 172 | 173 | response.release() 174 | 175 | protocol.raise_for_status(response.headers, self._trailers) 176 | 177 | return grpc.experimental.aio.EOF 178 | 179 | @Call._raise_timeout(asyncio.TimeoutError) 180 | async def __aiter__(self): 181 | response = await self._get_response() 182 | 183 | async for trailers, _, message in protocol.unwrap_message_stream_async( 184 | response.content 185 | ): 186 | if trailers: 187 | self._trailers = protocol.unpack_trailers(message) 188 | break 189 | else: 190 | yield self._deserializer(message) 191 | 192 | response.release() 193 | 194 | protocol.raise_for_status(response.headers, self._trailers) 195 | -------------------------------------------------------------------------------- /sdgrpcserver/sonora/echotest.py: -------------------------------------------------------------------------------- 1 | from google.protobuf.duration_pb2 import Duration 2 | from sdgrpcserver.sonora import client 3 | from echo.echo import echo_pb2_grpc, echo_pb2 4 | 5 | c = client.insecure_web_channel("http://localhost:8888") 6 | x = echo_pb2_grpc.EchoServiceStub(c) 7 | d = Duration(seconds=1) 8 | 9 | for r in x.ServerStreamingEcho( 10 | echo_pb2.ServerStreamingEchoRequest( 11 | message="honk", message_count=10, message_interval=d 12 | ) 13 | ): 14 | print(r) 15 | -------------------------------------------------------------------------------- /sdgrpcserver/sonora/protocol.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import functools 3 | import struct 4 | from urllib.parse import unquote 5 | 6 | import grpc 7 | 8 | 9 | _HEADER_FORMAT = ">BI" 10 | _HEADER_LENGTH = struct.calcsize(_HEADER_FORMAT) 11 | 12 | 13 | def _pack_header_flags(trailers, compressed): 14 | return (trailers << 7) | (compressed) 15 | 16 | 17 | def _unpack_header_flags(flags): 18 | trailers = 1 << 7 19 | compressed = 1 20 | 21 | return bool(trailers & flags), bool(compressed & flags) 22 | 23 | 24 | def wrap_message(trailers, compressed, message): 25 | return ( 26 | struct.pack( 27 | _HEADER_FORMAT, _pack_header_flags(trailers, compressed), len(message) 28 | ) 29 | + message 30 | ) 31 | 32 | 33 | def b64_wrap_message(trailers, compressed, message): 34 | return base64.b64encode(wrap_message(trailers, compressed, message)) 35 | 36 | 37 | def unwrap_message(message): 38 | flags, length = struct.unpack(_HEADER_FORMAT, message[:_HEADER_LENGTH]) 39 | data = message[_HEADER_LENGTH : _HEADER_LENGTH + length] 40 | 41 | if length != len(data): 42 | raise ValueError() 43 | 44 | trailers, compressed = _unpack_header_flags(flags) 45 | 46 | return trailers, compressed, data 47 | 48 | 49 | def b64_unwrap_message(message): 50 | return unwrap_message(base64.b64decode(message)) 51 | 52 | 53 | def unwrap_message_stream(stream): 54 | data = stream.read(_HEADER_LENGTH) 55 | 56 | while data: 57 | flags, length = struct.unpack(_HEADER_FORMAT, data) 58 | trailers, compressed = _unpack_header_flags(flags) 59 | 60 | yield trailers, compressed, stream.read(length) 61 | 62 | if trailers: 63 | break 64 | 65 | data = stream.read(_HEADER_LENGTH) 66 | 67 | 68 | async def unwrap_message_stream_async(stream): 69 | data = await stream.readexactly(_HEADER_LENGTH) 70 | 71 | while data: 72 | flags, length = struct.unpack(_HEADER_FORMAT, data) 73 | trailers, compressed = _unpack_header_flags(flags) 74 | 75 | yield trailers, compressed, await stream.readexactly(length) 76 | 77 | if trailers: 78 | break 79 | 80 | data = await stream.readexactly(_HEADER_LENGTH) 81 | 82 | 83 | async def unwrap_message_asgi(receive, decoder=None): 84 | buffer = bytearray() 85 | waiting = False 86 | flags = None 87 | length = None 88 | 89 | while True: 90 | event = await receive() 91 | assert event["type"].startswith("http.") 92 | 93 | if decoder: 94 | chunk = decoder(event["body"]) 95 | else: 96 | chunk = event["body"] 97 | 98 | buffer += chunk 99 | 100 | if len(buffer) >= _HEADER_LENGTH: 101 | if not waiting: 102 | flags, length = struct.unpack(_HEADER_FORMAT, buffer[:_HEADER_LENGTH]) 103 | 104 | if len(buffer) >= _HEADER_LENGTH + length: 105 | waiting = False 106 | data = buffer[_HEADER_LENGTH : _HEADER_LENGTH + length] 107 | trailers, compressed = _unpack_header_flags(flags) 108 | 109 | yield trailers, compressed, data 110 | buffer = buffer[_HEADER_LENGTH + length :] 111 | else: 112 | waiting = True 113 | 114 | if not event.get("more_body"): 115 | break 116 | 117 | 118 | b64_unwrap_message_asgi = functools.partial( 119 | unwrap_message_asgi, decoder=base64.b64decode 120 | ) 121 | 122 | 123 | def pack_trailers(trailers): 124 | message = [] 125 | for k, v in trailers: 126 | k = k.lower() 127 | message.append(f"{k}: {v}\r\n".encode("ascii")) 128 | return b"".join(message) 129 | 130 | 131 | def unpack_trailers(message): 132 | trailers = [] 133 | for line in message.decode("ascii").splitlines(): 134 | k, v = line.split(":", 1) 135 | v = v.strip() 136 | 137 | trailers.append((k, v)) 138 | return trailers 139 | 140 | 141 | def encode_headers(metadata): 142 | for header, value in metadata: 143 | if isinstance(value, bytes): 144 | if not header.endswith("-bin"): 145 | raise ValueError("binary headers must have the '-bin' suffix") 146 | 147 | value = base64.b64encode(value).decode("ascii") 148 | 149 | if isinstance(header, bytes): 150 | header = header.decode("ascii") 151 | 152 | yield header, value 153 | 154 | 155 | class WebRpcError(grpc.RpcError): 156 | _code_to_enum = {code.value[0]: code for code in grpc.StatusCode} # type: ignore 157 | 158 | def __init__(self, code, details, *args, **kwargs): 159 | super(WebRpcError, self).__init__(*args, **kwargs) 160 | 161 | self._code = code 162 | self._details = details 163 | 164 | @classmethod 165 | def from_metadata(cls, trailers): 166 | status = int(trailers["grpc-status"]) 167 | details = trailers.get("grpc-message") 168 | 169 | code = cls._code_to_enum[status] 170 | 171 | return cls(code, details) 172 | 173 | def __str__(self): 174 | return "WebRpcError(status_code={}, details='{}')".format( 175 | self._code, self._details 176 | ) 177 | 178 | def code(self): 179 | return self._code 180 | 181 | def details(self): 182 | return self._details 183 | 184 | 185 | def raise_for_status(headers, trailers=None): 186 | if trailers: 187 | metadata = dict(trailers) 188 | else: 189 | metadata = headers 190 | 191 | if "grpc-status" in metadata and metadata["grpc-status"] != "0": 192 | metadata = metadata.copy() 193 | 194 | if "grpc-message" in metadata: 195 | metadata["grpc-message"] = unquote(metadata["grpc-message"]) 196 | 197 | raise WebRpcError.from_metadata(metadata) 198 | 199 | 200 | _timeout_units = { 201 | b"H": 3600.0, 202 | b"M": 60.0, 203 | b"S": 1.0, 204 | b"m": 1 / 1000.0, 205 | b"u": 1 / 1000000.0, 206 | b"n": 1 / 1000000000.0, 207 | } 208 | 209 | 210 | def parse_timeout(value): 211 | units = value[-1:] 212 | coef = _timeout_units[units] 213 | count = int(value[:-1]) 214 | return count * coef 215 | 216 | 217 | def serialize_timeout(seconds): 218 | if seconds % 3600 == 0: 219 | value = seconds // 3600 220 | units = "H" 221 | elif seconds % 60 == 0: 222 | value = seconds // 60 223 | units = "M" 224 | elif seconds % 1 == 0: 225 | value = seconds 226 | units = "S" 227 | elif seconds * 1000 % 1 == 0: 228 | value = seconds * 1000 229 | units = "m" 230 | elif seconds * 1000000 % 1 == 0: 231 | value = seconds * 1000000 232 | units = "u" 233 | else: 234 | value = seconds * 1000000000 235 | units = "n" 236 | 237 | return f"{int(value)}{units}" 238 | -------------------------------------------------------------------------------- /sdgrpcserver/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/sdgrpcserver/src/__init__.py -------------------------------------------------------------------------------- /sdgrpcserver/utils.py: -------------------------------------------------------------------------------- 1 | 2 | from array import ArrayType 3 | import io 4 | import PIL 5 | from PIL import PngImagePlugin 6 | import numpy as np 7 | import cv2 as cv 8 | import torch 9 | 10 | import generation_pb2 11 | 12 | from sdgrpcserver import images 13 | from sdgrpcserver.pipeline.vae_approximator import VaeApproximator 14 | 15 | def artifact_to_image(artifact): 16 | if artifact.type == generation_pb2.ARTIFACT_IMAGE or artifact.type == generation_pb2.ARTIFACT_MASK: 17 | img = PIL.Image.open(io.BytesIO(artifact.binary)) 18 | return img 19 | else: 20 | raise NotImplementedError("Can't convert that artifact to an image") 21 | 22 | def image_to_artifact(im, artifact_type=generation_pb2.ARTIFACT_IMAGE, meta=None): 23 | binary=None 24 | 25 | if isinstance(im, torch.Tensor): 26 | im = images.toPIL(im)[0] 27 | 28 | if isinstance(im, PIL.Image.Image): 29 | buf = io.BytesIO() 30 | info = PngImagePlugin.PngInfo() 31 | if meta: 32 | for k, v in meta.items(): info.add_text(k, v) 33 | im.save(buf, format='PNG', pnginfo=info) 34 | buf.seek(0) 35 | binary=buf.getvalue() 36 | else: 37 | binary=cv.imencode(".png", im)[1] 38 | 39 | return generation_pb2.Artifact( 40 | type=artifact_type, 41 | binary=binary, 42 | mime="image/png" 43 | ) 44 | 45 | class CallbackImageWrapper: 46 | def __init__(self, callback, device, dtype): 47 | self.callback = callback 48 | self.vae_approximator = VaeApproximator() 49 | 50 | def __call__(self, i, t, latents): 51 | pixels = self.vae_approximator(latents) 52 | pixels = (pixels / 2 + 0.5).clamp(0, 1) 53 | self.callback(i, t, pixels) 54 | -------------------------------------------------------------------------------- /server.py: -------------------------------------------------------------------------------- 1 | from sdgrpcserver import server 2 | 3 | if __name__ == "__main__": 4 | server.main() 5 | -------------------------------------------------------------------------------- /stablecabal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/stablecabal.png -------------------------------------------------------------------------------- /tests/VRAMUsageMonitor.py: -------------------------------------------------------------------------------- 1 | 2 | import pynvml 3 | import threading 4 | import time 5 | 6 | class VRAMUsageMonitor(threading.Thread): 7 | stop_flag = False 8 | max_usage = 0 9 | total = -1 10 | 11 | def __init__(self): 12 | threading.Thread.__init__(self) 13 | 14 | def run(self): 15 | try: 16 | pynvml.nvmlInit() 17 | except: 18 | print(f"Unable to initialize NVIDIA management. No memory stats. \n") 19 | return 20 | print(f"Recording max memory usage...\n") 21 | handle = pynvml.nvmlDeviceGetHandleByIndex(0) 22 | self.total = pynvml.nvmlDeviceGetMemoryInfo(handle).total 23 | print(f"Total memory available {self.total}") 24 | 25 | while not self.stop_flag: 26 | m = pynvml.nvmlDeviceGetMemoryInfo(handle) 27 | self.max_usage = max(self.max_usage, m.used) 28 | # print(self.max_usage) 29 | time.sleep(0.1) 30 | print(f"Stopped recording.\n") 31 | pynvml.nvmlShutdown() 32 | 33 | def read(self): 34 | return self.max_usage, self.total 35 | 36 | def read_and_reset(self): 37 | max_usage = self.max_usage 38 | self.max_usage = 0 39 | return max_usage, self.total 40 | 41 | def stop(self): 42 | self.stop_flag = True 43 | 44 | def read_and_stop(self): 45 | self.stop_flag = True 46 | return self.max_usage, self.total 47 | -------------------------------------------------------------------------------- /tests/batch_independance.py: -------------------------------------------------------------------------------- 1 | from test_harness import TestHarness, VRAMUsageMonitor, ALGORITHMS 2 | import os, sys, re, time 3 | from types import SimpleNamespace as SN 4 | 5 | import torch 6 | 7 | from sdgrpcserver import images 8 | import generation_pb2, generation_pb2_grpc 9 | 10 | class TestRunner(TestHarness): 11 | """ 12 | Tests to make sure that the unified pipeline it batch-independant. 13 | 14 | Batch-independant means that we should get the same results for a single batch of four images, 15 | two batches of two images each, or four batches of a single image each, so long as the seeds are the same. 16 | 17 | e.g [1,2,3,4,] == [1,2], [3,4] == [1], [2], [3], [4] 18 | 19 | This should be true both from batches that are created by a single prompt with num_images_per_prompt, 20 | and for multiple prompts 21 | """ 22 | 23 | 24 | 25 | def params(self, **extra): 26 | return { 27 | "height": 512, 28 | "width": 512, 29 | "guidance_scale": 7.5, 30 | "sampler": ALGORITHMS["plms"], 31 | "eta": 0, 32 | "num_inference_steps": 50, 33 | "seed": -1, 34 | "strength": 0.8, 35 | **extra 36 | } 37 | 38 | def test(self): 39 | with open("image.png", "rb") as file: 40 | test_image = file.read() 41 | image = images.fromPngBytes(test_image).to(self.manager.mode.device) 42 | 43 | with open("mask.png", "rb") as file: 44 | test_mask = file.read() 45 | mask = images.fromPngBytes(test_mask).to(self.manager.mode.device) 46 | 47 | def gen(args, prompts, seeds, tag): 48 | args = { 49 | **args, 50 | "prompt": prompts, 51 | "seed": seeds, 52 | "num_images_per_prompt": len(seeds) // len(prompts) 53 | } 54 | 55 | suffix=f"{mode}{'_clip' if args['clip_guidance_scale'] > 0 else ''}_{tag}_" 56 | 57 | self.save_output(suffix, self.get_pipeline().generate(**args)[0]) 58 | 59 | for mode in ["txt2img", "img2img", "inpaint"]: 60 | for clip_guidance in [1.0, 0]: 61 | 62 | kwargs = self.params(clip_guidance_scale = clip_guidance) 63 | if mode == "img2img" or mode == "inpaint": kwargs["init_image"] = image 64 | if mode == "inpaint": kwargs["mask_image"] = mask 65 | 66 | # Most common is going to be num_images_per_prompt, so check that first 67 | 68 | for i, seed in enumerate([[420420420, 420420421], [420420420], [420420421]]): 69 | gen(kwargs, ["A Crocodile"], seed, f"croc{i}") 70 | 71 | # Then check 2 prompts and 2 images per prompt - first all four 72 | 73 | seed = [420420420, 420420421, 520520520, 520520521] 74 | gen(kwargs, ["A Crocodile", "A Shark"], seed, f"both") 75 | 76 | # Then create the two sharks independantly 77 | 78 | for i, seed in enumerate([[520520520], [520520521]]): 79 | gen(kwargs, ["A Shark"], seed, f"shark{i}") 80 | 81 | 82 | runner = TestRunner(engine_path="engines.clip.yaml", prefix=f"bi", vramO=2) 83 | runner.run() 84 | -------------------------------------------------------------------------------- /tests/engines.basic.yaml: -------------------------------------------------------------------------------- 1 | - id: "testengine" 2 | default: True 3 | enabled: True 4 | visible: True 5 | name: "Test Engine" 6 | description: "Test Engine" 7 | class: "UnifiedPipeline" 8 | model: "runwayml/stable-diffusion-v1-5" 9 | use_auth_token: True 10 | local_model: "./stable-diffusion-v1-5" 11 | local_model_fp16: "./stable-diffusion-v1-5-fp16" 12 | -------------------------------------------------------------------------------- /tests/engines.clip.yaml: -------------------------------------------------------------------------------- 1 | - id: "testengine" 2 | default: True 3 | enabled: True 4 | visible: True 5 | name: "Test Engine" 6 | description: "Test Engine" 7 | class: "UnifiedPipeline" 8 | model: "runwayml/stable-diffusion-v1-5" 9 | use_auth_token: True 10 | local_model: "./stable-diffusion-v1-5" 11 | local_model_fp16: "./stable-diffusion-v1-5-fp16" 12 | overrides: 13 | inpaint_unet: 14 | model: "runwayml/stable-diffusion-inpainting" 15 | subfolder: "unet" 16 | clip_model: laion/CLIP-ViT-B-32-laion2B-s34B-b79K 17 | feature_extractor: laion/CLIP-ViT-B-32-laion2B-s34B-b79K 18 | -------------------------------------------------------------------------------- /tests/engines.inpaint.yaml: -------------------------------------------------------------------------------- 1 | - id: "testengine" 2 | default: True 3 | enabled: True 4 | visible: True 5 | name: "Test Engine" 6 | description: "Test Engine" 7 | class: "UnifiedPipeline" 8 | model: "runwayml/stable-diffusion-v1-5" 9 | use_auth_token: True 10 | local_model: "./stable-diffusion-v1-5" 11 | local_model_fp16: "./stable-diffusion-v1-5-fp16" 12 | overrides: 13 | inpaint_unet: 14 | model: "runwayml/stable-diffusion-inpainting" 15 | local_model: "./stable-diffusion-inpainting" 16 | local_model_fp16: "./stable-diffusion-inpainting-fp16" 17 | subfolder: "unet" 18 | 19 | -------------------------------------------------------------------------------- /tests/engines.sd14.yaml: -------------------------------------------------------------------------------- 1 | - id: "testengine" 2 | default: True 3 | enabled: True 4 | visible: True 5 | name: "Test Engine" 6 | description: "Test Engine" 7 | class: "UnifiedPipeline" 8 | model: "CompVis/stable-diffusion-v1-4" 9 | use_auth_token: True 10 | local_model: "./stable-diffusion-v1-4" 11 | local_model_fp16: "./stable-diffusion-v1-4-fp16" 12 | -------------------------------------------------------------------------------- /tests/graftedpaint.engine.yaml: -------------------------------------------------------------------------------- 1 | - model_id: 'inkpunk-diffusion' 2 | model: "Envvi/Inkpunk-Diffusion" 3 | has_fp16: False 4 | 5 | - model_id: 'sd2-inpainting' 6 | model: 'stabilityai/stable-diffusion-2-inpainting' 7 | whitelist: ["unet", "text_encoder"] 8 | 9 | - model_id: "laion-clip-b" 10 | type: "clip" 11 | model: "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" 12 | has_fp16: False 13 | - model_id: "laion-clip-l" 14 | type: "clip" 15 | model: "laion/CLIP-ViT-L-14-laion2B-s32B-b82K" 16 | has_fp16: False 17 | 18 | - id: "justinkpunk" 19 | default: True 20 | enabled: True 21 | name: "Just Inkpunk" 22 | description: "Just Inkpunk" 23 | class: "UnifiedPipeline" 24 | model: "@inkpunk-diffusion" 25 | overrides: 26 | clip: 27 | model: "@laion-clip-b" 28 | 29 | - id: "withsd2inpaint" 30 | default: True 31 | enabled: True 32 | name: "Test Engine" 33 | description: "Test Engine" 34 | class: "UnifiedPipeline" 35 | model: "@inkpunk-diffusion" 36 | overrides: 37 | inpaint_unet: 38 | model: "@sd2-inpainting/unet" 39 | inpaint_text_encoder: 40 | model: "@sd2-inpainting/text_encoder" 41 | clip: 42 | model: "@laion-clip-b" 43 | 44 | -------------------------------------------------------------------------------- /tests/graftedpaint.image_512_512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/tests/graftedpaint.image_512_512.png -------------------------------------------------------------------------------- /tests/graftedpaint.image_768_600.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/tests/graftedpaint.image_768_600.png -------------------------------------------------------------------------------- /tests/graftedpaint.image_768_768.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/tests/graftedpaint.image_768_768.png -------------------------------------------------------------------------------- /tests/graftedpaint.py: -------------------------------------------------------------------------------- 1 | """ 2 | isort:skip_file 3 | """ 4 | from test_harness import TestHarness, ALGORITHMS 5 | from sdgrpcserver import images 6 | from PIL import Image, ImageOps 7 | 8 | 9 | class TestRunner(TestHarness): 10 | """ 11 | Tests to ensure grafted inpaint works 12 | """ 13 | 14 | def params(self, **extra): 15 | return { 16 | "height": 768, 17 | "width": 768, 18 | "guidance_scale": 7.5, 19 | "sampler": ALGORITHMS["k_euler_ancestral"], 20 | "churn": 0.4, 21 | "karras_rho": 7, 22 | "num_inference_steps": 64, 23 | "seed": 420420420, 24 | "strength": 1.0, 25 | **extra, 26 | } 27 | 28 | def testres(self, width, height): 29 | test_image = Image.open(f"graftedpaint.image_{width}_{height}.png") 30 | 31 | # Split into 3 channels 32 | r, g, b, a = test_image.split() 33 | # Recombine back to RGB image 34 | test_image = Image.merge("RGB", (r, g, b)) 35 | test_mask = Image.merge("RGB", (a, a, a)) 36 | test_mask = ImageOps.invert(test_mask) 37 | 38 | image = images.fromPIL(test_image).to(self.manager.mode.device) 39 | mask = images.fromPIL(test_mask).to(self.manager.mode.device) 40 | 41 | def gen(args, engine, grafted, tag): 42 | pipeline = self.get_pipeline(engine) 43 | pipeline._pipeline.set_options({"grafted_inpaint": grafted}) 44 | self.save_output(f"{tag}_{width}_{height}", pipeline.generate(**args)[0]) 45 | 46 | kwargs = self.params( 47 | width=width, 48 | height=height, 49 | init_image=image, 50 | mask_image=mask, 51 | prompt=["An nvinkpunk cat wearing a spacesuit stares at a large moon"], 52 | seed=[420420420, 420420421, 420420422, 420420423], 53 | num_images_per_prompt=4, 54 | ) 55 | 56 | gen(kwargs, "justinkpunk", False, "ink") 57 | gen(kwargs, "withsd2inpaint", False, "sd2") 58 | gen(kwargs, "withsd2inpaint", True, "graft") 59 | 60 | clipargs = dict(**kwargs, clip_guidance_scale=0.5) 61 | clipargs["num_inference_steps"] = 96 62 | 63 | gen(clipargs, "justinkpunk", False, "clip_ink") 64 | gen(clipargs, "withsd2inpaint", False, "clip_sd2") 65 | gen(clipargs, "withsd2inpaint", True, "clip_graft") 66 | 67 | def test(self): 68 | self.testres(512, 512) 69 | self.testres(768, 600) 70 | self.testres(768, 768) 71 | 72 | 73 | runner = TestRunner( 74 | engine_path="graftedpaint.engine.yaml", prefix="graftedpaint", vramO=3 75 | ) 76 | runner.run() 77 | -------------------------------------------------------------------------------- /tests/happy_path.engines.yaml: -------------------------------------------------------------------------------- 1 | 2 | - model_id: "sd1" 3 | model: "runwayml/stable-diffusion-v1-5" 4 | local_model: "/weights/stable-diffusion-v1-5" 5 | local_model_fp16: "/weights/stable-diffusion-v1-5-fp16" 6 | use_auth_token: True 7 | 8 | - model_id: "sd1-inpainting" 9 | whitelist: "unet" 10 | model: "runwayml/stable-diffusion-inpainting" 11 | local_model: "/weights/stable-diffusion-inpainting" 12 | local_model_fp16: "/weights/stable-diffusion-inpainting-fp16" 13 | whitelist: ["unet", "text_encoder"] 14 | use_auth_token: True 15 | 16 | - model_id: 'sd2' 17 | model: 'stabilityai/stable-diffusion-2-base' 18 | 19 | - model_id: 'sd2-inpainting' 20 | model: 'stabilityai/stable-diffusion-2-inpainting' 21 | whitelist: ["unet", "text_encoder"] 22 | 23 | - model_id: 'sd2v' 24 | model: 'stabilityai/stable-diffusion-2' 25 | 26 | - model_id: "laion-clip-b" 27 | type: "clip" 28 | model: "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" 29 | has_fp16: False 30 | 31 | - id: "sd1" 32 | enabled: True 33 | name: "Stable Diffusion 1.5" 34 | description: "Stable Diffusion 1.5" 35 | class: "UnifiedPipeline" 36 | model: "@sd1" 37 | overrides: 38 | inpaint_unet: 39 | model: "@sd1-inpainting/unet" 40 | clip: 41 | model: "@laion-clip-b" 42 | 43 | - id: "sd2" 44 | enabled: True 45 | name: "Stable Diffusion 2" 46 | description: "Stable Diffusion 2" 47 | class: "UnifiedPipeline" 48 | model: "@sd2" 49 | overrides: 50 | inpaint_unet: 51 | model: "@sd2-inpainting/unet" 52 | clip: 53 | model: "@laion-clip-b" 54 | 55 | - id: "sd2v" 56 | enabled: True 57 | name: "Stable Diffusion 2 VPred" 58 | description: "Stable Diffusion 2" 59 | class: "UnifiedPipeline" 60 | model: "@sd2v" 61 | overrides: 62 | clip: 63 | model: "@laion-clip-b" 64 | 65 | -------------------------------------------------------------------------------- /tests/happy_path.image_512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/tests/happy_path.image_512.png -------------------------------------------------------------------------------- /tests/happy_path.image_768.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/tests/happy_path.image_768.png -------------------------------------------------------------------------------- /tests/happy_path.py: -------------------------------------------------------------------------------- 1 | import io 2 | import re 3 | from collections import OrderedDict 4 | 5 | from PIL import Image, ImageOps 6 | from test_harness import ALGORITHMS, TestHarness, VRAMUsageMonitor, generation_pb2 7 | 8 | from sdgrpcserver import images 9 | 10 | 11 | def load_masked_image(path): 12 | test_image = Image.open(path) 13 | 14 | # Split into 3 channels 15 | r, g, b, a = test_image.split() 16 | # Recombine back to RGB image 17 | test_image = Image.merge("RGB", (r, g, b)) 18 | test_mask = Image.merge("RGB", (a, a, a)) 19 | test_mask = ImageOps.invert(test_mask) 20 | 21 | with io.BytesIO() as output: 22 | test_image.save(output, format="PNG") 23 | test_image_png = output.getvalue() 24 | 25 | with io.BytesIO() as output: 26 | test_mask.save(output, format="PNG") 27 | test_mask_png = output.getvalue() 28 | 29 | return test_image_png, test_mask_png 30 | 31 | 32 | args = OrderedDict() 33 | args["sampler"] = [ 34 | {"sampler": "ddim", "eta": 0}, 35 | {"sampler": "ddim", "eta": 0.8}, 36 | {"sampler": "plms"}, 37 | {"sampler": "k_lms"}, 38 | {"sampler": "k_euler"}, 39 | {"sampler": "k_euler_ancestral"}, 40 | {"sampler": "k_heun"}, 41 | {"sampler": "k_dpm_2"}, 42 | {"sampler": "k_dpm_2_ancestral"}, 43 | {"sampler": "dpm_fast"}, 44 | {"sampler": "dpm_adaptive"}, 45 | {"sampler": "dpmspp_1"}, 46 | {"sampler": "dpmspp_2"}, 47 | {"sampler": "dpmspp_3"}, 48 | {"sampler": "dpmspp_2s_ancestral"}, 49 | {"sampler": "dpmspp_sde"}, 50 | {"sampler": "dpmspp_2m"}, 51 | ] 52 | args["image"] = [ 53 | {}, 54 | {"image": True, "strength": 0.25}, 55 | {"image": True, "strength": 0.5}, 56 | {"image": True, "strength": 0.75}, 57 | {"image": True, "mask": True, "strength": 0.5}, 58 | {"image": True, "mask": True, "strength": 1}, 59 | { 60 | "image": True, 61 | "mask": True, 62 | "strength": 1.5, 63 | }, 64 | ] 65 | args["engine"] = [{"engine": "sd1"}, {"engine": "sd2"}, {"engine": "sd2v"}] 66 | 67 | 68 | image_by_size = { 69 | 512: load_masked_image("happy_path.image_512.png"), 70 | 768: load_masked_image("happy_path.image_768.png"), 71 | } 72 | 73 | 74 | class TestRunner(TestHarness): 75 | def __init__(self, combos, *args, **kwargs): 76 | super().__init__(*args, **kwargs) 77 | self.combos = combos 78 | 79 | def engine(self, item, request, prompt, parameters): 80 | request.engine_id = item["engine"] 81 | 82 | if item["engine"] == "sd2v": 83 | request.image.width = 768 84 | request.image.height = 768 85 | 86 | def sampler(self, item, request, prompt, parameters): 87 | request.image.transform.diffusion = ALGORITHMS[item["sampler"]] 88 | 89 | eta = item.get("eta", None) 90 | if eta != None: 91 | parameters.sampler.eta = eta 92 | 93 | def image(self, item, request, prompt, parameters): 94 | image, mask = image_by_size[request.image.height] 95 | 96 | if item.get("image", False): 97 | prompt.append( 98 | generation_pb2.Prompt( 99 | parameters=generation_pb2.PromptParameters(init=True), 100 | artifact=generation_pb2.Artifact( 101 | type=generation_pb2.ARTIFACT_IMAGE, binary=image 102 | ), 103 | ) 104 | ) 105 | 106 | parameters.schedule.start = item["strength"] 107 | parameters.schedule.end = 0.01 108 | 109 | if item.get("mask", False): 110 | prompt.append( 111 | generation_pb2.Prompt( 112 | artifact=generation_pb2.Artifact( 113 | type=generation_pb2.ARTIFACT_MASK, binary=mask 114 | ) 115 | ) 116 | ) 117 | 118 | def build_combinations(self, args, idx): 119 | if idx == len(args.keys()) - 1: 120 | key = list(args.keys())[idx] 121 | return [{key: item} for item in args[key]] 122 | 123 | key = list(args.keys())[idx] 124 | result = [] 125 | 126 | for item in args[key]: 127 | result += [ 128 | {**combo, key: item} for combo in self.build_combinations(args, idx + 1) 129 | ] 130 | 131 | return result 132 | 133 | def test(self): 134 | combinations = self.build_combinations(self.combos, 0) 135 | 136 | for combo in combinations: 137 | request_id = re.sub("[^\w]+", "_", repr(combo)) 138 | request_id = request_id.strip("_") 139 | 140 | prompt = [ 141 | generation_pb2.Prompt( 142 | text="Award wining DSLR photo of a shark in the deep ocean, f2/8 35mm Portra 400, highly detailed, trending on artstation" 143 | ) 144 | ] 145 | 146 | parameters = generation_pb2.StepParameter() 147 | 148 | request = generation_pb2.Request( 149 | engine_id="testengine", 150 | request_id=request_id, 151 | prompt=[], 152 | image=generation_pb2.ImageParameters( 153 | height=512, 154 | width=512, 155 | seed=[420420420], # It's the funny number 156 | steps=50, 157 | samples=1, 158 | parameters=[], 159 | ), 160 | ) 161 | 162 | if ( 163 | combo["sampler"]["sampler"] == "plms" 164 | and combo["engine"]["engine"] == "sd2v" 165 | ): 166 | continue 167 | 168 | for key, item in combo.items(): 169 | getattr(self, key)(item, request, prompt, parameters) 170 | 171 | for part in prompt: 172 | request.prompt.append(part) 173 | 174 | request.image.parameters.append(parameters) 175 | 176 | self.save_output(request_id, self.call_generator(request)) 177 | 178 | 179 | monitor = VRAMUsageMonitor() 180 | monitor.start() 181 | 182 | stats = {} 183 | 184 | for vramO in range(4): 185 | instance = TestRunner( 186 | engine_path="happy_path.engines.yaml", 187 | combos=args, 188 | prefix=f"hp_{vramO}", 189 | vramO=vramO, 190 | monitor=monitor, 191 | ) 192 | stats[f"run vram-optimisation-level={vramO}"] = instance.run() 193 | 194 | monitor.stop() 195 | 196 | print("Stats") 197 | print(repr(stats)) 198 | -------------------------------------------------------------------------------- /tests/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/tests/image.png -------------------------------------------------------------------------------- /tests/mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/tests/mask.png -------------------------------------------------------------------------------- /tests/prompt_weights.engine.yaml: -------------------------------------------------------------------------------- 1 | - model_id: 'sd2' 2 | model: 'stabilityai/stable-diffusion-2-base' 3 | 4 | - model_id: "laion-clip-b" 5 | type: "clip" 6 | model: "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" 7 | has_fp16: False 8 | - model_id: "laion-clip-h" 9 | type: "clip" 10 | model: "laion/CLIP-ViT-H-14-laion2B-s32B-b79K" 11 | has_fp16: False 12 | 13 | - id: "testengine" 14 | default: True 15 | enabled: True 16 | name: "Test Engine" 17 | description: "Test Engine" 18 | class: "UnifiedPipeline" 19 | model: "@sd2" 20 | overrides: 21 | clip: 22 | model: "@laion-clip-h" 23 | -------------------------------------------------------------------------------- /tests/prompt_weights.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from test_harness import ALGORITHMS, TestHarness 3 | 4 | 5 | class TestRunner(TestHarness): 6 | """ 7 | Tests to ensure grafted inpaint works 8 | """ 9 | 10 | def params(self, **extra): 11 | return { 12 | "height": 512, 13 | "width": 512, 14 | "guidance_scale": 7.5, 15 | "sampler": ALGORITHMS["k_euler"], 16 | "num_inference_steps": 64, 17 | "seed": 420420420, 18 | **extra, 19 | } 20 | 21 | def test(self): 22 | def gen(args, tag): 23 | pipeline = self.get_pipeline() 24 | self.save_output(f"{tag}", pipeline.generate(**args)[0]) 25 | 26 | for i in np.linspace(-0.5, 0.5, 5): 27 | prompt_tokens = [ 28 | ( 29 | "So let me tell you a story. One day I was walking under the summer sun. " 30 | "I had decided I wanted to take the evening air, and so had left the house around 5pm. " 31 | "It had been raining earlier, but in this golden hour the sun was gently warm against my skin. " 32 | "As I rounded a corner I had not walked around before I came across a wonderful sight. ", 33 | 1.0, 34 | ), 35 | ("A DSLR photo of a meadow filled with ", 1.0), 36 | ("daisies", 1.0 + i), 37 | (" and ", 1.0), 38 | ("tulips", 1.0 - i), 39 | (", f/2.8 35mm Portra 400", 1.0), 40 | ] 41 | 42 | kwargs = self.params( 43 | prompt=[prompt_tokens], 44 | ) 45 | 46 | gen(kwargs, f"{i}") 47 | 48 | clipargs = dict(**kwargs, clip_guidance_scale=0.5) 49 | clipargs["num_inference_steps"] = 96 50 | 51 | gen(clipargs, f"clip_{i}") 52 | 53 | 54 | runner = TestRunner( 55 | engine_path="prompt_weights.engine.yaml", prefix="prompt_weights", vramO=3 56 | ) 57 | runner.run() 58 | -------------------------------------------------------------------------------- /tests/schedulers.py: -------------------------------------------------------------------------------- 1 | from test_harness import TestHarness, VRAMUsageMonitor, ALGORITHMS 2 | import os, sys, re, time 3 | from types import SimpleNamespace as SN 4 | 5 | import torch 6 | 7 | import generation_pb2, generation_pb2_grpc 8 | 9 | class TestRunner(TestHarness): 10 | 11 | def params(self, **extra): 12 | return { 13 | "height": 512, 14 | "width": 512, 15 | "guidance_scale": 7.5, 16 | "sampler": ALGORITHMS["k_euler_ancestral"], 17 | "eta": 0, 18 | "num_inference_steps": 50, 19 | "seed": -1, 20 | "strength": 0.8, 21 | **extra 22 | } 23 | 24 | def test(self): 25 | prompt = 'anime girl holding a giant NVIDIA Tesla A100 GPU graphics card, Anime Blu-Ray boxart, super high detail' 26 | seed = self.string_to_seed('hlky') 27 | 28 | for name, sampler in ALGORITHMS.items(): 29 | kwargs = self.params(sampler=sampler, seed=seed) 30 | self.save_output(name, self.get_pipeline('testengine').generate(prompt=prompt, **kwargs)[0]) 31 | 32 | runner = TestRunner(engine_path="engines.sd14.yaml", prefix=f"seed", vramO=2) 33 | runner.run() 34 | -------------------------------------------------------------------------------- /tests/test_harness.py: -------------------------------------------------------------------------------- 1 | """ 2 | isort:skip_file 3 | """ 4 | 5 | import os, sys, re, time, inspect, random 6 | 7 | import yaml 8 | 9 | try: 10 | from yaml import CLoader as Loader, CDumper as Dumper 11 | except ImportError: 12 | from yaml import Loader, Dumper 13 | 14 | import torch 15 | 16 | basePath = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 17 | sys.path.append(basePath) 18 | 19 | # This line adds the various other module paths into the import searchpath 20 | from sdgrpcserver.server import main 21 | 22 | from sdgrpcserver.services.generate import GenerationServiceServicer 23 | from sdgrpcserver.manager import EngineMode, EngineManager 24 | from sdgrpcserver import images 25 | 26 | import generation_pb2 27 | 28 | from VRAMUsageMonitor import VRAMUsageMonitor 29 | 30 | ALGORITHMS = { 31 | "ddim": generation_pb2.SAMPLER_DDIM, 32 | "plms": generation_pb2.SAMPLER_DDPM, 33 | "k_euler": generation_pb2.SAMPLER_K_EULER, 34 | "k_euler_ancestral": generation_pb2.SAMPLER_K_EULER_ANCESTRAL, 35 | "k_heun": generation_pb2.SAMPLER_K_HEUN, 36 | "k_dpm_2": generation_pb2.SAMPLER_K_DPM_2, 37 | "k_dpm_2_ancestral": generation_pb2.SAMPLER_K_DPM_2_ANCESTRAL, 38 | "k_lms": generation_pb2.SAMPLER_K_LMS, 39 | "dpm_fast": generation_pb2.SAMPLER_DPM_FAST, 40 | "dpm_adaptive": generation_pb2.SAMPLER_DPM_ADAPTIVE, 41 | "dpmspp_1": generation_pb2.SAMPLER_DPMSOLVERPP_1ORDER, 42 | "dpmspp_2": generation_pb2.SAMPLER_DPMSOLVERPP_2ORDER, 43 | "dpmspp_3": generation_pb2.SAMPLER_DPMSOLVERPP_3ORDER, 44 | "dpmspp_2s_ancestral": generation_pb2.SAMPLER_DPMSOLVERPP_2S_ANCESTRAL, 45 | "dpmspp_sde": generation_pb2.SAMPLER_DPMSOLVERPP_SDE, 46 | "dpmspp_2m": generation_pb2.SAMPLER_DPMSOLVERPP_2M, 47 | } 48 | 49 | 50 | class FakeContext: 51 | def __init__(self, monitor): 52 | self.monitor = monitor 53 | 54 | def add_callback(self, callback): 55 | pass 56 | 57 | def set_code(self, code): 58 | print("Test failed") 59 | self.monitor.stop() 60 | sys.exit(-1) 61 | 62 | def set_details(self, code): 63 | pass 64 | 65 | 66 | class TestHarness: 67 | def __init__(self, engine_path, vramO=2, monitor=None, prefix=None): 68 | self.monitor_is_ours = False 69 | 70 | if monitor is None: 71 | self.monitor_is_ours = True 72 | monitor = VRAMUsageMonitor() 73 | 74 | self.monitor = monitor 75 | 76 | self.prefix = self.__class__ if prefix is None else prefix 77 | 78 | with open(os.path.normpath(engine_path), "r") as cfg: 79 | engines = yaml.load(cfg, Loader=Loader) 80 | 81 | self.manager = EngineManager( 82 | engines, 83 | weight_root="../weights/", 84 | mode=EngineMode( 85 | vram_optimisation_level=vramO, enable_cuda=True, enable_mps=False 86 | ), 87 | nsfw_behaviour="ignore", 88 | refresh_on_error=True, 89 | ) 90 | 91 | self.manager.loadPipelines() 92 | 93 | def get_pipeline(self, id="testengine"): 94 | return self.manager.getPipe(id) 95 | 96 | def call_generator(self, request): 97 | generator = GenerationServiceServicer(self.manager) 98 | context = FakeContext(self.monitor) 99 | 100 | return generator.Generate(request, context) 101 | 102 | def string_to_seed(self, string): 103 | return random.Random(string).randint(0, 2**32 - 1) 104 | 105 | def _flatten_outputs(self, output): 106 | if isinstance(output, list) or inspect.isgenerator(output): 107 | for item in output: 108 | yield from self._flatten_outputs(item) 109 | 110 | elif isinstance(output, torch.Tensor): 111 | if len(output.shape) == 4 and output.shape[0] > 1: 112 | yield from output.chunk(output.shape[0], dim=0) 113 | else: 114 | yield output 115 | 116 | elif isinstance(output, generation_pb2.Answer): 117 | yield from self._flatten_outputs( 118 | [ 119 | artifact 120 | for artifact in output.artifacts 121 | if artifact.type == generation_pb2.ARTIFACT_IMAGE 122 | ] 123 | ) 124 | 125 | else: 126 | yield output 127 | 128 | def save_output(self, suffix, output): 129 | 130 | for i, output in enumerate(self._flatten_outputs(output)): 131 | path = ( 132 | f"out/{self.prefix}_{suffix}_{i}.png" 133 | if i is not None 134 | else f"out/{self.prefix}_{suffix}.png" 135 | ) 136 | 137 | if isinstance(output, torch.Tensor): 138 | binary = images.toPngBytes(output)[0] 139 | with open(path, "wb") as f: 140 | f.write(binary) 141 | 142 | elif isinstance(output, generation_pb2.Artifact): 143 | with open(path, "wb") as f: 144 | f.write(output.binary) 145 | 146 | else: 147 | raise ValueError( 148 | f"Don't know how to handle output of class {output.__class__}" 149 | ) 150 | 151 | def run(self): 152 | if self.monitor_is_ours: 153 | self.monitor.start() 154 | 155 | self.monitor.read_and_reset() 156 | start_time = time.monotonic() 157 | print("Running....") 158 | self.test() 159 | end_time = time.monotonic() 160 | used, total = self.monitor.read_and_reset() 161 | 162 | if self.monitor_is_ours: 163 | self.monitor.stop() 164 | 165 | runstats = {"vramused": used, "time": end_time - start_time} 166 | print("Run complete", repr(runstats)) 167 | 168 | return runstats 169 | -------------------------------------------------------------------------------- /weights/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/weights/.gitkeep --------------------------------------------------------------------------------